[XEN] Rename shadow2 to shadow and move the various source

author kaf24@localhost.localdomain <kaf24@localhost.localdomain>

Mon, 28 Aug 2006 11:09:36 +0000 (12:09 +0100)

committer kaf24@localhost.localdomain <kaf24@localhost.localdomain>

Mon, 28 Aug 2006 11:09:36 +0000 (12:09 +0100)
author kaf24@localhost.localdomain <kaf24@localhost.localdomain>
Mon, 28 Aug 2006 11:09:36 +0000 (12:09 +0100)
committer kaf24@localhost.localdomain <kaf24@localhost.localdomain>
Mon, 28 Aug 2006 11:09:36 +0000 (12:09 +0100)
diff --git a/tools/libxc/xc_hvm_build.c b/tools/libxc/xc_hvm_build.c

index c39ffa323f8e68df42bb4865b51a508da24e557a..dce154d7b7afafdaebc69add2d7db60962bc7efa 100644 (file)
--- a/tools/libxc/xc_hvm_build.c
+++ b/tools/libxc/xc_hvm_build.c
@@ -441,7 +441,7 @@ static int xc_hvm_build_internal(int xc_handle,
          goto error_out;
      }
  
-    /* HVM domains must be put into shadow2 mode at the start of day */
+    /* HVM domains must be put into shadow mode at the start of day */
      if ( xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_ENABLE,
                             NULL, 0, NULL, 
                             XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT  |
diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile

index 6c3552c6c9315dccc5b9ad6edbda4479e4d8aa18..991fa436b3a02ecbc89150dc9b11057da0f6c71c 100644 (file)
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -2,6 +2,7 @@ subdir-y += acpi
  subdir-y += cpu
  subdir-y += genapic
  subdir-y += hvm
+subdir-y += mm
  subdir-y += oprofile
  
  subdir-$(x86_32) += x86_32
@@ -41,23 +42,6 @@ obj-y += traps.o
  obj-y += usercopy.o
  obj-y += x86_emulate.o
  
-ifneq ($(pae),n)
-obj-$(x86_32) += shadow2-common.o shadow2_g2_on_s3.o shadow2_g3_on_s3.o
-else
-obj-$(x86_32) += shadow2-common.o shadow2_g2_on_s2.o
-endif
-
-obj-$(x86_64) += shadow2-common.o shadow2_g4_on_s4.o shadow2_g3_on_s3.o \
-                 shadow2_g2_on_s3.o
-
-guest_levels  = $(subst g,,$(filter g%,$(subst ., ,$(subst _, ,$(subst shadow2_,,$(1))))))
-shadow_levels = $(subst s,,$(filter s%,$(subst ., ,$(subst _, ,$(subst shadow2_,,$(1))))))
-shadow2_defns = -DGUEST_PAGING_LEVELS=$(call guest_levels,$(1)) \
-                -DSHADOW_PAGING_LEVELS=$(call shadow_levels,$(1))
-
-shadow2_%.o: shadow2.c $(HDRS) Makefile
-       $(CC) $(CFLAGS) $(call shadow2_defns,$(@F)) -c $< -o $@
-
  obj-$(crash_debug) += gdbstub.o
  
  $(TARGET): $(TARGET)-syms boot/mkelf32
@@ -86,9 +70,6 @@ xen.lds: $(TARGET_SUBARCH)/xen.lds.S $(HDRS)
  boot/mkelf32: boot/mkelf32.c
         $(HOSTCC) $(HOSTCFLAGS) -o $@ $<
  
-shadow_guest32.o: shadow.c
-shadow_guest32pae.o: shadow.c
-
  .PHONY: clean
  clean::
         rm -f asm-offsets.s xen.lds boot/*.o boot/*~ boot/core boot/mkelf32
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c

index f2bb9a5920a12ec9466fdae4c75cb7bdd0da66cf..7acb3b7ceaaba44c0849d190ce2c63a3747fb323 100644 (file)
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -200,12 +200,12 @@ int arch_domain_create(struct domain *d)
  
  #endif /* __x86_64__ */
  
-    shadow2_lock_init(d);
-    for ( i = 0; i <= SHADOW2_MAX_ORDER; i++ )
-        INIT_LIST_HEAD(&d->arch.shadow2.freelists[i]);
-    INIT_LIST_HEAD(&d->arch.shadow2.p2m_freelist);
-    INIT_LIST_HEAD(&d->arch.shadow2.p2m_inuse);
-    INIT_LIST_HEAD(&d->arch.shadow2.toplevel_shadows);
+    shadow_lock_init(d);
+    for ( i = 0; i <= SHADOW_MAX_ORDER; i++ )
+        INIT_LIST_HEAD(&d->arch.shadow.freelists[i]);
+    INIT_LIST_HEAD(&d->arch.shadow.p2m_freelist);
+    INIT_LIST_HEAD(&d->arch.shadow.p2m_inuse);
+    INIT_LIST_HEAD(&d->arch.shadow.toplevel_shadows);
  
      if ( !is_idle_domain(d) )
      {
@@ -236,7 +236,7 @@ int arch_domain_create(struct domain *d)
  
  void arch_domain_destroy(struct domain *d)
  {
-    shadow2_final_teardown(d);
+    shadow_final_teardown(d);
  
      free_xenheap_pages(
          d->arch.mm_perdomain_pt,
@@ -342,10 +342,10 @@ int arch_set_info_guest(
          }
      }    
  
-    /* Shadow2: make sure the domain has enough shadow memory to
+    /* Shadow: make sure the domain has enough shadow memory to
       * boot another vcpu */
-    if ( shadow2_mode_enabled(d) 
-         && d->arch.shadow2.total_pages < shadow2_min_acceptable_pages(d) )
+    if ( shadow_mode_enabled(d) 
+         && d->arch.shadow.total_pages < shadow_min_acceptable_pages(d) )
      {
          destroy_gdt(v);
          return -ENOMEM;
@@ -357,8 +357,8 @@ int arch_set_info_guest(
      /* Don't redo final setup */
      set_bit(_VCPUF_initialised, &v->vcpu_flags);
  
-    if ( shadow2_mode_enabled(d) )
-        shadow2_update_paging_modes(v);
+    if ( shadow_mode_enabled(d) )
+        shadow_update_paging_modes(v);
  
      update_cr3(v);
  
@@ -936,11 +936,11 @@ void domain_relinquish_resources(struct domain *d)
      for_each_vcpu ( d, v )
      {
          /* Drop ref to guest_table (from new_guest_cr3(), svm/vmx cr3 handling,
-         * or sh2_update_paging_modes()) */
+         * or sh_update_paging_modes()) */
          pfn = pagetable_get_pfn(v->arch.guest_table);
          if ( pfn != 0 )
          {
-            if ( shadow2_mode_refcounts(d) )
+            if ( shadow_mode_refcounts(d) )
                  put_page(mfn_to_page(pfn));
              else
                  put_page_and_type(mfn_to_page(pfn));
@@ -962,7 +962,7 @@ void domain_relinquish_resources(struct domain *d)
          hvm_relinquish_guest_resources(d);
  
      /* Tear down shadow mode stuff. */
-    shadow2_teardown(d);
+    shadow_teardown(d);
  
      /*
       * Relinquish GDT mappings. No need for explicit unmapping of the LDT as
@@ -981,18 +981,18 @@ void domain_relinquish_resources(struct domain *d)
  
  void arch_dump_domain_info(struct domain *d)
  {
-    if ( shadow2_mode_enabled(d) )
+    if ( shadow_mode_enabled(d) )
      {
-        printk("    shadow2 mode: ");
-        if ( d->arch.shadow2.mode & SHM2_enable )
+        printk("    shadow mode: ");
+        if ( d->arch.shadow.mode & SHM2_enable )
              printk("enabled ");
-        if ( shadow2_mode_refcounts(d) )
+        if ( shadow_mode_refcounts(d) )
              printk("refcounts ");
-        if ( shadow2_mode_log_dirty(d) )
+        if ( shadow_mode_log_dirty(d) )
              printk("log_dirty ");
-        if ( shadow2_mode_translate(d) )
+        if ( shadow_mode_translate(d) )
              printk("translate ");
-        if ( shadow2_mode_external(d) )
+        if ( shadow_mode_external(d) )
              printk("external ");
          printk("\n");
      }
diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c

index adb903762edea64b38cc1694e3b6ecb48699c21b..dd44e827f48b59cf9d84445ed6111e4235504b7a 100644 (file)
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -679,8 +679,8 @@ int construct_dom0(struct domain *d,
          (void)alloc_vcpu(d, i, i);
  
      /* Set up CR3 value for write_ptbase */
-    if ( shadow2_mode_enabled(v->domain) )
-        shadow2_update_paging_modes(v);
+    if ( shadow_mode_enabled(v->domain) )
+        shadow_update_paging_modes(v);
      else
          update_cr3(v);
  
@@ -791,8 +791,8 @@ int construct_dom0(struct domain *d,
      new_thread(v, dsi.v_kernentry, vstack_end, vstartinfo_start);
  
      if ( opt_dom0_shadow )
-        if ( shadow2_test_enable(d) == 0 ) 
-            shadow2_update_paging_modes(v);
+        if ( shadow_test_enable(d) == 0 ) 
+            shadow_update_paging_modes(v);
  
      if ( supervisor_mode_kernel )
      {
diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c

index 45caf069e9c0240f582c584fee3dd4699624a8ad..912a7c42523978bb5b12f515388dbbb45e1d21e6 100644 (file)
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -39,7 +39,7 @@ long arch_do_domctl(
          d = find_domain_by_id(domctl->domain);
          if ( d != NULL )
          {
-            ret = shadow2_domctl(d, &domctl->u.shadow_op, u_domctl);
+            ret = shadow_domctl(d, &domctl->u.shadow_op, u_domctl);
              put_domain(d);
              copy_to_guest(u_domctl, domctl, 1);
          } 
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c

index 5dc4111171a0c8e29fb161c922891359696baee0..0a7aa015d497d2cd556f6d7f700b8552a42a1937 100644 (file)
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -384,8 +384,8 @@ int hvm_copy(void *buf, unsigned long vaddr, int size, int dir)
          if (count > size)
              count = size;
  
-        gfn = shadow2_gva_to_gfn(v, vaddr);
-        mfn = mfn_x(sh2_vcpu_gfn_to_mfn(v, gfn));
+        gfn = shadow_gva_to_gfn(v, vaddr);
+        mfn = mfn_x(sh_vcpu_gfn_to_mfn(v, gfn));
  
          if (mfn == INVALID_MFN)
              return 0;
@@ -539,7 +539,7 @@ void hvm_do_hypercall(struct cpu_user_regs *pregs)
          return;
      }
  
-    if ( current->arch.shadow2.mode->guest_levels == 4 )
+    if ( current->arch.shadow.mode->guest_levels == 4 )
      {
          pregs->rax = hvm_hypercall64_table[pregs->rax](pregs->rdi,
                                                         pregs->rsi,
diff --git a/xen/arch/x86/hvm/platform.c b/xen/arch/x86/hvm/platform.c

index 920e7786a0eaf956ce4dd2ec27ff182bf5028dcf..d5fb545728927028918dd130d2e684509f9a7dbc 100644 (file)
--- a/xen/arch/x86/hvm/platform.c
+++ b/xen/arch/x86/hvm/platform.c
@@ -721,7 +721,7 @@ void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
  
      if (pvalid) {
          if (hvm_paging_enabled(current))
-            p->u.data = shadow2_gva_to_gpa(current, value);
+            p->u.data = shadow_gva_to_gpa(current, value);
          else
              p->u.pdata = (void *) value; /* guest VA == guest PA */
      } else
@@ -771,7 +771,7 @@ void send_mmio_req(
  
      if (pvalid) {
          if (hvm_paging_enabled(v))
-            p->u.data = shadow2_gva_to_gpa(v, value);
+            p->u.data = shadow_gva_to_gpa(v, value);
          else
              p->u.pdata = (void *) value; /* guest VA == guest PA */
      } else
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c

index 3378d3879eceb54ad5fc2c5f44cab56ccaa9edb5..e68d66b0de4faf255f0d9442dc901d5c3a134b04 100644 (file)
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -29,7 +29,7 @@
  #include <xen/domain_page.h>
  #include <asm/current.h>
  #include <asm/io.h>
-#include <asm/shadow2.h>
+#include <asm/shadow.h>
  #include <asm/regs.h>
  #include <asm/cpufeature.h>
  #include <asm/processor.h>
@@ -746,10 +746,10 @@ static void svm_final_setup_guest(struct vcpu *v)
      if ( v != d->vcpu[0] )
          return;
  
-    if ( !shadow2_mode_external(d) )
+    if ( !shadow_mode_external(d) )
      {
          DPRINTK("Can't init HVM for dom %u vcpu %u: "
-                "not in shadow2 external mode\n", d->domain_id, v->vcpu_id);
+                "not in shadow external mode\n", d->domain_id, v->vcpu_id);
          domain_crash(d);
      }
  
@@ -914,7 +914,7 @@ static int svm_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
                  va, eip, (unsigned long)regs->error_code);
  //#endif
  
-    result = shadow2_fault(va, regs); 
+    result = shadow_fault(va, regs); 
  
      if( result ) {
          /* Let's make sure that the Guest TLB is flushed */
@@ -1562,7 +1562,7 @@ static int svm_set_cr0(unsigned long value)
          v->arch.guest_table = pagetable_from_pfn(mfn);
          if ( old_base_mfn )
              put_page(mfn_to_page(old_base_mfn));
-        shadow2_update_paging_modes(v);
+        shadow_update_paging_modes(v);
  
          HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", 
                      (unsigned long) (mfn << PAGE_SHIFT));
@@ -1588,14 +1588,14 @@ static int svm_set_cr0(unsigned long value)
              svm_inject_exception(v, TRAP_gp_fault, 1, 0);
              return 0;
          }
-        shadow2_update_paging_modes(v);
+        shadow_update_paging_modes(v);
          vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
          set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
      }
      else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
      {
          /* we should take care of this kind of situation */
-        shadow2_update_paging_modes(v);
+        shadow_update_paging_modes(v);
          vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
          set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
      }
@@ -1706,7 +1706,7 @@ static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
              mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
              if (mfn != pagetable_get_pfn(v->arch.guest_table))
                  __hvm_bug(regs);
-            shadow2_update_cr3(v);
+            shadow_update_cr3(v);
          }
          else 
          {
@@ -1771,7 +1771,7 @@ static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
                  v->arch.guest_table = pagetable_from_pfn(mfn);
                  if ( old_base_mfn )
                      put_page(mfn_to_page(old_base_mfn));
-                shadow2_update_paging_modes(v);
+                shadow_update_paging_modes(v);
  
                  HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
                              (unsigned long) (mfn << PAGE_SHIFT));
@@ -1808,7 +1808,7 @@ static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
          if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE))
          {
              set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
-            shadow2_update_paging_modes(v);
+            shadow_update_paging_modes(v);
          }
          break;
      }
@@ -2149,7 +2149,7 @@ void svm_handle_invlpg(const short invlpga, struct cpu_user_regs *regs)
  
      /* Overkill, we may not this */
      set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
-    shadow2_invlpg(v, g_vaddr);
+    shadow_invlpg(v, g_vaddr);
  }
  
  
@@ -2520,7 +2520,7 @@ void walk_shadow_and_guest_pt(unsigned long gva)
      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
      unsigned long gpa;
  
-    gpa = shadow2_gva_to_gpa(current, gva);
+    gpa = shadow_gva_to_gpa(current, gva);
      printk( "gva = %lx, gpa=%lx, gCR3=%x\n", gva, gpa, (u32)vmcb->cr3 );
      if( !svm_paging_enabled(v) || mmio_space(gpa) )
          return;
@@ -2591,7 +2591,7 @@ asmlinkage void svm_vmexit_handler(struct cpu_user_regs regs)
          if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF) 
          {
              if (svm_paging_enabled(v) && 
-                !mmio_space(shadow2_gva_to_gpa(current, vmcb->exitinfo2)))
+                !mmio_space(shadow_gva_to_gpa(current, vmcb->exitinfo2)))
              {
                  printk("I%08ld,ExC=%s(%d),IP=%x:%llx,"
                         "I1=%llx,I2=%llx,INT=%llx, "
@@ -2601,7 +2601,7 @@ asmlinkage void svm_vmexit_handler(struct cpu_user_regs regs)
                         (unsigned long long) vmcb->exitinfo1,
                         (unsigned long long) vmcb->exitinfo2,
                         (unsigned long long) vmcb->exitintinfo.bytes,
-                       (unsigned long long) shadow2_gva_to_gpa(current, vmcb->exitinfo2));
+                       (unsigned long long) shadow_gva_to_gpa(current, vmcb->exitinfo2));
              }
              else 
              {
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c

index 75de5f49ea37477f2901e9d8c991da3642b3af85..5c80d7e89a296a52000dfd42412d27c47cfc6358 100644 (file)
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -35,7 +35,7 @@
  #include <xen/event.h>
  #include <xen/kernel.h>
  #include <xen/keyhandler.h>
-#include <asm/shadow2.h>
+#include <asm/shadow.h>
  
  static int vmcs_size;
  static int vmcs_order;
@@ -272,7 +272,7 @@ static void vmx_do_launch(struct vcpu *v)
      error |= __vmwrite(GUEST_TR_BASE, 0);
      error |= __vmwrite(GUEST_TR_LIMIT, 0xff);
  
-    shadow2_update_paging_modes(v);
+    shadow_update_paging_modes(v);
      printk("%s(): GUEST_CR3<=%08lx, HOST_CR3<=%08lx\n",
             __func__, v->arch.hvm_vcpu.hw_cr3, v->arch.cr3);
      __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3);
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c

index c0a1616c0aaaff25392a344601f3b86df1b4c6b5..5060ddd04faa049d01b3a0b34d6b2a8e9bcdd78a 100644 (file)
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -40,7 +40,7 @@
  #include <asm/hvm/vmx/vmx.h>
  #include <asm/hvm/vmx/vmcs.h>
  #include <asm/hvm/vmx/cpu.h>
-#include <asm/shadow2.h>
+#include <asm/shadow.h>
  #include <public/sched.h>
  #include <public/hvm/ioreq.h>
  #include <asm/hvm/vpic.h>
@@ -66,10 +66,10 @@ static int vmx_initialize_guest_resources(struct vcpu *v)
      if ( v->vcpu_id != 0 )
          return 1;
  
-    if ( !shadow2_mode_external(d) )
+    if ( !shadow_mode_external(d) )
      {
          DPRINTK("Can't init HVM for dom %u vcpu %u: "
-                "not in shadow2 external mode\n", 
+                "not in shadow external mode\n", 
                  d->domain_id, v->vcpu_id);
          domain_crash(d);
      }
@@ -865,7 +865,7 @@ static int vmx_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
      }
  #endif
  
-    result = shadow2_fault(va, regs);
+    result = shadow_fault(va, regs);
  
      TRACE_VMEXIT (2,result);
  #if 0
@@ -1039,7 +1039,7 @@ static void vmx_vmexit_do_invlpg(unsigned long va)
       * We do the safest things first, then try to update the shadow
       * copying from guest
       */
-    shadow2_invlpg(v, va);
+    shadow_invlpg(v, va);
  }
  
  
@@ -1301,7 +1301,7 @@ vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
  
   skip_cr3:
  
-    shadow2_update_paging_modes(v);
+    shadow_update_paging_modes(v);
      if (!vmx_paging_enabled(v))
          HVM_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table");
      else
@@ -1504,7 +1504,7 @@ static int vmx_set_cr0(unsigned long value)
          v->arch.guest_table = pagetable_from_pfn(mfn);
          if (old_base_mfn)
              put_page(mfn_to_page(old_base_mfn));
-        shadow2_update_paging_modes(v);
+        shadow_update_paging_modes(v);
  
          HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
                      (unsigned long) (mfn << PAGE_SHIFT));
@@ -1577,7 +1577,7 @@ static int vmx_set_cr0(unsigned long value)
      else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
      {
          __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3);
-        shadow2_update_paging_modes(v);
+        shadow_update_paging_modes(v);
      }
  
      return 1;
@@ -1662,7 +1662,7 @@ static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
              mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
              if (mfn != pagetable_get_pfn(v->arch.guest_table))
                  __hvm_bug(regs);
-            shadow2_update_cr3(v);
+            shadow_update_cr3(v);
          } else {
              /*
               * If different, make a shadow. Check if the PDBR is valid
@@ -1755,7 +1755,7 @@ static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
           * all TLB entries except global entries.
           */
          if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) )
-            shadow2_update_paging_modes(v);
+            shadow_update_paging_modes(v);
          break;
      }
      default:
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c

index b688bd90f24b5e27e51683e3afd320c4d2af2c8a..541938c073c78e3fb670b98138c15faa2578603d 100644 (file)
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -454,12 +454,12 @@ int map_ldt_shadow_page(unsigned int off)
  
      res = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page);
  
-    if ( !res && unlikely(shadow2_mode_refcounts(d)) )
+    if ( !res && unlikely(shadow_mode_refcounts(d)) )
      {
-        shadow2_lock(d);
-        shadow2_remove_write_access(d->vcpu[0], _mfn(mfn), 0, 0);
+        shadow_lock(d);
+        shadow_remove_write_access(d->vcpu[0], _mfn(mfn), 0, 0);
          res = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page);
-        shadow2_unlock(d);
+        shadow_unlock(d);
      }
  
      if ( unlikely(!res) )
@@ -527,7 +527,7 @@ get_linear_pagetable(
      struct page_info *page;
      unsigned long pfn;
  
-    ASSERT( !shadow2_mode_refcounts(d) );
+    ASSERT( !shadow_mode_refcounts(d) );
  
      if ( (root_get_flags(re) & _PAGE_RW) )
      {
@@ -602,12 +602,12 @@ get_page_from_l1e(
          d = dom_io;
      }
  
-    /* Foreign mappings into guests in shadow2 external mode don't
+    /* Foreign mappings into guests in shadow external mode don't
       * contribute to writeable mapping refcounts.  (This allows the
       * qemu-dm helper process in dom0 to map the domain's memory without
       * messing up the count of "real" writable mappings.) */
      okay = (((l1e_get_flags(l1e) & _PAGE_RW) && 
-             !(unlikely(shadow2_mode_external(d) && (d != current->domain))))
+             !(unlikely(shadow_mode_external(d) && (d != current->domain))))
              ? get_page_and_type(page, d, PGT_writable_page)
              : get_page(page, d));
      if ( !okay )
@@ -771,9 +771,9 @@ void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
      }
  
      /* Remember we didn't take a type-count of foreign writable mappings
-     * to shadow2 external domains */
+     * to shadow external domains */
      if ( (l1e_get_flags(l1e) & _PAGE_RW) && 
-         !(unlikely((e != d) && shadow2_mode_external(e))) )
+         !(unlikely((e != d) && shadow_mode_external(e))) )
      {
          put_page_and_type(page);
      }
@@ -830,7 +830,7 @@ static int alloc_l1_table(struct page_info *page)
      l1_pgentry_t  *pl1e;
      int            i;
  
-    ASSERT(!shadow2_mode_refcounts(d));
+    ASSERT(!shadow_mode_refcounts(d));
  
      pl1e = map_domain_page(pfn);
  
@@ -883,7 +883,7 @@ static int create_pae_xen_mappings(l3_pgentry_t *pl3e)
       *     a. alloc_l3_table() calls this function and this check will fail
       *     b. mod_l3_entry() disallows updates to slot 3 in an existing table
       *
-     * XXX -- this needs revisiting for shadow2_mode_refcount()==true...
+     * XXX -- this needs revisiting for shadow_mode_refcount()==true...
       */
      page = l3e_get_page(l3e3);
      BUG_ON(page->u.inuse.type_info & PGT_pinned);
@@ -1007,7 +1007,7 @@ static int alloc_l2_table(struct page_info *page, unsigned long type)
      l2_pgentry_t  *pl2e;
      int            i;
  
-    ASSERT(!shadow2_mode_refcounts(d));
+    ASSERT(!shadow_mode_refcounts(d));
      
      pl2e = map_domain_page(pfn);
  
@@ -1059,7 +1059,7 @@ static int alloc_l3_table(struct page_info *page, unsigned long type)
      l3_pgentry_t  *pl3e;
      int            i;
  
-    ASSERT(!shadow2_mode_refcounts(d));
+    ASSERT(!shadow_mode_refcounts(d));
  
  #ifdef CONFIG_X86_PAE
      /*
@@ -1120,7 +1120,7 @@ static int alloc_l4_table(struct page_info *page, unsigned long type)
      unsigned long vaddr;
      int            i;
  
-    ASSERT(!shadow2_mode_refcounts(d));
+    ASSERT(!shadow_mode_refcounts(d));
  
      for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
      {
@@ -1234,8 +1234,8 @@ static inline int update_l1e(l1_pgentry_t *pl1e,
                               struct vcpu *v)
  {
      int rv = 1;
-    if ( unlikely(shadow2_mode_enabled(v->domain)) )
-        shadow2_lock(v->domain);
+    if ( unlikely(shadow_mode_enabled(v->domain)) )
+        shadow_lock(v->domain);
  #ifndef PTE_UPDATE_WITH_CMPXCHG
      rv = (!__copy_to_user(pl1e, &nl1e, sizeof(nl1e)));
  #else
@@ -1266,10 +1266,10 @@ static inline int update_l1e(l1_pgentry_t *pl1e,
          }
      }
  #endif
-    if ( unlikely(shadow2_mode_enabled(v->domain)) )
+    if ( unlikely(shadow_mode_enabled(v->domain)) )
      {
-        shadow2_validate_guest_entry(v, _mfn(gl1mfn), pl1e);
-        shadow2_unlock(v->domain);    
+        shadow_validate_guest_entry(v, _mfn(gl1mfn), pl1e);
+        shadow_unlock(v->domain);    
      }
      return rv;
  }
@@ -1339,13 +1339,13 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
  #endif
  #define UPDATE_ENTRY(_t,_p,_o,_n,_m)  ({                            \
      int rv;                                                         \
-    if ( unlikely(shadow2_mode_enabled(current->domain)) )          \
-        shadow2_lock(current->domain);                              \
+    if ( unlikely(shadow_mode_enabled(current->domain)) )          \
+        shadow_lock(current->domain);                              \
      rv = _UPDATE_ENTRY(_t, _p, _o, _n);                             \
-    if ( unlikely(shadow2_mode_enabled(current->domain)) )          \
+    if ( unlikely(shadow_mode_enabled(current->domain)) )          \
      {                                                               \
-        shadow2_validate_guest_entry(current, _mfn(_m), (_p));      \
-        shadow2_unlock(current->domain);                            \
+        shadow_validate_guest_entry(current, _mfn(_m), (_p));      \
+        shadow_unlock(current->domain);                            \
      }                                                               \
      rv;                                                             \
  })
@@ -1581,21 +1581,21 @@ void free_page_type(struct page_info *page, unsigned long type)
           */
          this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS;
  
-        if ( unlikely(shadow2_mode_enabled(owner)
-                 && !shadow2_lock_is_acquired(owner)) )
+        if ( unlikely(shadow_mode_enabled(owner)
+                 && !shadow_lock_is_acquired(owner)) )
          {
              /* Raw page tables are rewritten during save/restore. */
-            if ( !shadow2_mode_translate(owner) )
+            if ( !shadow_mode_translate(owner) )
                  mark_dirty(owner, page_to_mfn(page));
  
-            if ( shadow2_mode_refcounts(owner) )
+            if ( shadow_mode_refcounts(owner) )
                  return;
  
              gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
              ASSERT(VALID_M2P(gmfn));
-            shadow2_lock(owner);
-            shadow2_remove_all_shadows(owner->vcpu[0], _mfn(gmfn));
-            shadow2_unlock(owner);
+            shadow_lock(owner);
+            shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn));
+            shadow_unlock(owner);
          }
      }
  
@@ -1760,7 +1760,7 @@ int get_page_type(struct page_info *page, unsigned long type)
  #endif
                      /* Fixme: add code to propagate va_unknown to subtables. */
                      if ( ((type & PGT_type_mask) >= PGT_l2_page_table) &&
-                         !shadow2_mode_refcounts(page_get_owner(page)) )
+                         !shadow_mode_refcounts(page_get_owner(page)) )
                          return 0;
                      /* This table is possibly mapped at multiple locations. */
                      nx &= ~PGT_va_mask;
@@ -1810,7 +1810,7 @@ int new_guest_cr3(unsigned long mfn)
      if ( hvm_guest(v) && !hvm_paging_enabled(v) )
          domain_crash_synchronous();
  
-    if ( shadow2_mode_refcounts(d) )
+    if ( shadow_mode_refcounts(d) )
      {
          okay = get_page_from_pagenr(mfn, d);
          if ( unlikely(!okay) )
@@ -1858,7 +1858,7 @@ int new_guest_cr3(unsigned long mfn)
  
      if ( likely(old_base_mfn != 0) )
      {
-        if ( shadow2_mode_refcounts(d) )
+        if ( shadow_mode_refcounts(d) )
              put_page(mfn_to_page(old_base_mfn));
          else
              put_page_and_type(mfn_to_page(old_base_mfn));
@@ -2043,7 +2043,7 @@ int do_mmuext_op(
              type = PGT_root_page_table;
  
          pin_page:
-            if ( shadow2_mode_refcounts(FOREIGNDOM) )
+            if ( shadow_mode_refcounts(FOREIGNDOM) )
                  break;
  
              okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM);
@@ -2065,7 +2065,7 @@ int do_mmuext_op(
              break;
  
          case MMUEXT_UNPIN_TABLE:
-            if ( shadow2_mode_refcounts(d) )
+            if ( shadow_mode_refcounts(d) )
                  break;
  
              if ( unlikely(!(okay = get_page_from_pagenr(mfn, d))) )
@@ -2078,11 +2078,11 @@ int do_mmuext_op(
              {
                  put_page_and_type(page);
                  put_page(page);
-                if ( shadow2_mode_enabled(d) )
+                if ( shadow_mode_enabled(d) )
                  {
-                    shadow2_lock(d);
-                    shadow2_remove_all_shadows(v, _mfn(mfn));
-                    shadow2_unlock(d);
+                    shadow_lock(d);
+                    shadow_remove_all_shadows(v, _mfn(mfn));
+                    shadow_unlock(d);
                  }
              }
              else
@@ -2125,8 +2125,8 @@ int do_mmuext_op(
              break;
      
          case MMUEXT_INVLPG_LOCAL:
-            if ( !shadow2_mode_enabled(d) 
-                 || shadow2_invlpg(v, op.arg1.linear_addr) != 0 )
+            if ( !shadow_mode_enabled(d) 
+                 || shadow_invlpg(v, op.arg1.linear_addr) != 0 )
                  local_flush_tlb_one(op.arg1.linear_addr);
              break;
  
@@ -2173,7 +2173,7 @@ int do_mmuext_op(
              unsigned long ptr  = op.arg1.linear_addr;
              unsigned long ents = op.arg2.nr_ents;
  
-            if ( shadow2_mode_external(d) )
+            if ( shadow_mode_external(d) )
              {
                  MEM_LOG("ignoring SET_LDT hypercall from external "
                          "domain %u", d->domain_id);
@@ -2319,7 +2319,7 @@ int do_mmu_update(
              case PGT_l3_page_table:
              case PGT_l4_page_table:
              {
-                if ( shadow2_mode_refcounts(d) )
+                if ( shadow_mode_refcounts(d) )
                  {
                      DPRINTK("mmu update on shadow-refcounted domain!");
                      break;
@@ -2372,16 +2372,16 @@ int do_mmu_update(
                  if ( unlikely(!get_page_type(page, PGT_writable_page)) )
                      break;
  
-                if ( unlikely(shadow2_mode_enabled(d)) )
-                    shadow2_lock(d);
+                if ( unlikely(shadow_mode_enabled(d)) )
+                    shadow_lock(d);
  
                  *(intpte_t *)va = req.val;
                  okay = 1;
  
-                if ( unlikely(shadow2_mode_enabled(d)) )
+                if ( unlikely(shadow_mode_enabled(d)) )
                  {
-                    shadow2_validate_guest_entry(v, _mfn(mfn), va);
-                    shadow2_unlock(d);
+                    shadow_validate_guest_entry(v, _mfn(mfn), va);
+                    shadow_unlock(d);
                  }
  
                  put_page_type(page);
@@ -2405,8 +2405,8 @@ int do_mmu_update(
                  break;
              }
  
-            if ( shadow2_mode_translate(FOREIGNDOM) )
-                shadow2_guest_physmap_add_page(FOREIGNDOM, gpfn, mfn);
+            if ( shadow_mode_translate(FOREIGNDOM) )
+                shadow_guest_physmap_add_page(FOREIGNDOM, gpfn, mfn);
              else 
                  set_gpfn_from_mfn(mfn, gpfn);
              okay = 1;
@@ -2492,7 +2492,7 @@ static int create_grant_pte_mapping(
          goto failed;
      } 
  
-    if ( !shadow2_mode_refcounts(d) )
+    if ( !shadow_mode_refcounts(d) )
          put_page_from_l1e(ol1e, d);
  
      put_page_type(page);
@@ -2590,7 +2590,7 @@ static int create_grant_va_mapping(
                      l2e_get_pfn(__linear_l2_table[l2_linear_offset(va)]), v) )
          return GNTST_general_error;
  
-    if ( !shadow2_mode_refcounts(d) )
+    if ( !shadow_mode_refcounts(d) )
          put_page_from_l1e(ol1e, d);
  
      return GNTST_okay;
@@ -2714,10 +2714,10 @@ int do_update_va_mapping(unsigned long va, u64 val64,
  
      perfc_incrc(calls_to_update_va);
  
-    if ( unlikely(!__addr_ok(va) && !shadow2_mode_external(d)) )
+    if ( unlikely(!__addr_ok(va) && !shadow_mode_external(d)) )
          return -EINVAL;
  
-    if ( unlikely(shadow2_mode_refcounts(d)) )
+    if ( unlikely(shadow_mode_refcounts(d)) )
      {
          DPRINTK("Grant op on a shadow-refcounted domain\n");
          return -EINVAL; 
@@ -2725,11 +2725,11 @@ int do_update_va_mapping(unsigned long va, u64 val64,
  
      LOCK_BIGLOCK(d);
  
-    if ( likely(rc == 0) && unlikely(shadow2_mode_enabled(d)) )
+    if ( likely(rc == 0) && unlikely(shadow_mode_enabled(d)) )
      {
          if ( unlikely(this_cpu(percpu_mm_info).foreign &&
-                      (shadow2_mode_translate(d) ||
-                       shadow2_mode_translate(
+                      (shadow_mode_translate(d) ||
+                       shadow_mode_translate(
                             this_cpu(percpu_mm_info).foreign))) )
          {
              /*
@@ -2770,8 +2770,8 @@ int do_update_va_mapping(unsigned long va, u64 val64,
          switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
          {
          case UVMF_LOCAL:
-            if ( !shadow2_mode_enabled(d) 
-                 || (shadow2_invlpg(current, va) != 0) ) 
+            if ( !shadow_mode_enabled(d) 
+                 || (shadow_invlpg(current, va) != 0) ) 
                  local_flush_tlb_one(va);
              break;
          case UVMF_ALL:
@@ -3006,7 +3006,7 @@ long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
              break;
          }
  
-        if ( !shadow2_mode_translate(d) || (mfn == 0) )
+        if ( !shadow_mode_translate(d) || (mfn == 0) )
          {
              put_domain(d);
              return -EINVAL;
@@ -3196,21 +3196,21 @@ static int ptwr_emulated_update(
      pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK));
      if ( do_cmpxchg )
      {
-        if ( shadow2_mode_enabled(d) )
-            shadow2_lock(d);
+        if ( shadow_mode_enabled(d) )
+            shadow_lock(d);
          ol1e = l1e_from_intpte(old);
          if ( cmpxchg((intpte_t *)pl1e, old, val) != old )
          {
-            if ( shadow2_mode_enabled(d) )
-                shadow2_unlock(d);
+            if ( shadow_mode_enabled(d) )
+                shadow_unlock(d);
              unmap_domain_page(pl1e);
              put_page_from_l1e(nl1e, d);
              return X86EMUL_CMPXCHG_FAILED;
          }
-        if ( unlikely(shadow2_mode_enabled(v->domain)) )
+        if ( unlikely(shadow_mode_enabled(v->domain)) )
          {
-            shadow2_validate_guest_entry(v, _mfn(page_to_mfn(page)), pl1e);
-            shadow2_unlock(v->domain);    
+            shadow_validate_guest_entry(v, _mfn(page_to_mfn(page)), pl1e);
+            shadow_unlock(v->domain);    
          }
      }
      else
diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile

new file mode 100644 (file)

index 0000000..5043fd9
--- /dev/null
+++ b/xen/arch/x86/mm/Makefile
@@ -0,0 +1 @@
+subdir-y += shadow
diff --git a/xen/arch/x86/mm/shadow/Makefile b/xen/arch/x86/mm/shadow/Makefile

new file mode 100644 (file)

index 0000000..6de7cca
--- /dev/null
+++ b/xen/arch/x86/mm/shadow/Makefile
@@ -0,0 +1,15 @@
+ifneq ($(pae),n)
+obj-$(x86_32) += common.o g2_on_s3.o g3_on_s3.o
+else
+obj-$(x86_32) += common.o g2_on_s2.o
+endif
+
+obj-$(x86_64) += common.o g4_on_s4.o g3_on_s3.o g2_on_s3.o
+
+guest_levels  = $(subst g,,$(filter g%,$(subst ., ,$(subst _, ,$(1)))))
+shadow_levels = $(subst s,,$(filter s%,$(subst ., ,$(subst _, ,$(1)))))
+shadow_defns  = -DGUEST_PAGING_LEVELS=$(call guest_levels,$(1)) \
+                -DSHADOW_PAGING_LEVELS=$(call shadow_levels,$(1))
+
+g%.o: multi.c $(HDRS) Makefile
+       $(CC) $(CFLAGS) $(call shadow_defns,$(@F)) -c $< -o $@
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c

new file mode 100644 (file)

index 0000000..8e7e905
--- /dev/null
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -0,0 +1,3407 @@
+/******************************************************************************
+ * arch/x86/mm/shadow/common.c
+ *
+ * Shadow code that does not need to be multiply compiled.
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#define SHADOW 1
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/mm.h>
+#include <xen/trace.h>
+#include <xen/sched.h>
+#include <xen/perfc.h>
+#include <xen/irq.h>
+#include <xen/domain_page.h>
+#include <xen/guest_access.h>
+#include <xen/keyhandler.h>
+#include <asm/event.h>
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/flushtlb.h>
+#include <asm/shadow.h>
+#include "private.h"
+
+#if SHADOW_AUDIT
+int shadow_audit_enable = 0;
+
+static void shadow_audit_key(unsigned char key)
+{
+    shadow_audit_enable = !shadow_audit_enable;
+    printk("%s shadow_audit_enable=%d\n",
+           __func__, shadow_audit_enable);
+}
+
+static int __init shadow_audit_key_init(void)
+{
+    register_keyhandler(
+        'O', shadow_audit_key,  "toggle shadow audits");
+    return 0;
+}
+__initcall(shadow_audit_key_init);
+#endif /* SHADOW_AUDIT */
+
+static void sh_free_log_dirty_bitmap(struct domain *d);
+
+int _shadow_mode_refcounts(struct domain *d)
+{
+    return shadow_mode_refcounts(d);
+}
+
+
+/**************************************************************************/
+/* x86 emulator support for the shadow code
+ */
+
+static int
+sh_x86_emulate_read_std(unsigned long addr,
+                         unsigned long *val,
+                         unsigned int bytes,
+                         struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *v = current;
+    if ( hvm_guest(v) )
+    {
+        *val = 0;
+        // XXX -- this is WRONG.
+        //        It entirely ignores the permissions in the page tables.
+        //        In this case, that is only a user vs supervisor access check.
+        //
+        if ( hvm_copy(val, addr, bytes, HVM_COPY_IN) )
+        {
+#if 0
+            SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n",
+                           v->domain->domain_id, v->vcpu_id, 
+                           addr, *val, bytes);
+#endif
+            return X86EMUL_CONTINUE;
+        }
+
+        /* If we got here, there was nothing mapped here, or a bad GFN 
+         * was mapped here.  This should never happen: we're here because
+         * of a write fault at the end of the instruction we're emulating. */ 
+        SHADOW_PRINTK("read failed to va %#lx\n", addr);
+        return X86EMUL_PROPAGATE_FAULT;
+    }
+    else 
+    {
+        SHADOW_PRINTK("this operation is not emulated yet\n");
+        return X86EMUL_UNHANDLEABLE;
+    }
+}
+
+static int
+sh_x86_emulate_write_std(unsigned long addr,
+                          unsigned long val,
+                          unsigned int bytes,
+                          struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *v = current;
+#if 0
+    SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n",
+                  v->domain->domain_id, v->vcpu_id, addr, val, bytes);
+#endif
+    if ( hvm_guest(v) )
+    {
+        // XXX -- this is WRONG.
+        //        It entirely ignores the permissions in the page tables.
+        //        In this case, that includes user vs supervisor, and
+        //        write access.
+        //
+        if ( hvm_copy(&val, addr, bytes, HVM_COPY_OUT) )
+            return X86EMUL_CONTINUE;
+
+        /* If we got here, there was nothing mapped here, or a bad GFN 
+         * was mapped here.  This should never happen: we're here because
+         * of a write fault at the end of the instruction we're emulating,
+         * which should be handled by sh_x86_emulate_write_emulated. */ 
+        SHADOW_PRINTK("write failed to va %#lx\n", addr);
+        return X86EMUL_PROPAGATE_FAULT;
+    }
+    else 
+    {
+        SHADOW_PRINTK("this operation is not emulated yet\n");
+        return X86EMUL_UNHANDLEABLE;
+    }
+}
+
+static int
+sh_x86_emulate_write_emulated(unsigned long addr,
+                               unsigned long val,
+                               unsigned int bytes,
+                               struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *v = current;
+#if 0
+    SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n",
+                  v->domain->domain_id, v->vcpu_id, addr, val, bytes);
+#endif
+    if ( hvm_guest(v) )
+    {
+        return v->arch.shadow.mode->x86_emulate_write(v, addr, &val, bytes, ctxt);
+    }
+    else 
+    {
+        SHADOW_PRINTK("this operation is not emulated yet\n");
+        return X86EMUL_UNHANDLEABLE;
+    }
+}
+
+static int 
+sh_x86_emulate_cmpxchg_emulated(unsigned long addr,
+                                 unsigned long old,
+                                 unsigned long new,
+                                 unsigned int bytes,
+                                 struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *v = current;
+#if 0
+    SHADOW_PRINTK("d=%u v=%u a=%#lx o?=%#lx n:=%#lx bytes=%u\n",
+                   v->domain->domain_id, v->vcpu_id, addr, old, new, bytes);
+#endif
+    if ( hvm_guest(v) )
+    {
+        return v->arch.shadow.mode->x86_emulate_cmpxchg(v, addr, old, new, 
+                                                    bytes, ctxt);
+    }
+    else 
+    {
+        SHADOW_PRINTK("this operation is not emulated yet\n");
+        return X86EMUL_UNHANDLEABLE;
+    }
+}
+
+static int 
+sh_x86_emulate_cmpxchg8b_emulated(unsigned long addr,
+                                   unsigned long old_lo,
+                                   unsigned long old_hi,
+                                   unsigned long new_lo,
+                                   unsigned long new_hi,
+                                   struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *v = current;
+#if 0
+    SHADOW_PRINTK("d=%u v=%u a=%#lx o?=%#lx:%lx n:=%#lx:%lx\n",
+                   v->domain->domain_id, v->vcpu_id, addr, old_hi, old_lo,
+                   new_hi, new_lo, ctxt);
+#endif
+    if ( hvm_guest(v) )
+    {
+        return v->arch.shadow.mode->x86_emulate_cmpxchg8b(v, addr, old_lo, old_hi,
+                                                      new_lo, new_hi, ctxt);
+    }
+    else 
+    {
+        SHADOW_PRINTK("this operation is not emulated yet\n");
+        return X86EMUL_UNHANDLEABLE;
+    }
+}
+
+
+struct x86_emulate_ops shadow_emulator_ops = {
+    .read_std           = sh_x86_emulate_read_std,
+    .write_std          = sh_x86_emulate_write_std,
+    .read_emulated      = sh_x86_emulate_read_std,
+    .write_emulated     = sh_x86_emulate_write_emulated,
+    .cmpxchg_emulated   = sh_x86_emulate_cmpxchg_emulated,
+    .cmpxchg8b_emulated = sh_x86_emulate_cmpxchg8b_emulated,
+};
+
+
+/**************************************************************************/
+/* Code for "promoting" a guest page to the point where the shadow code is
+ * willing to let it be treated as a guest page table.  This generally
+ * involves making sure there are no writable mappings available to the guest
+ * for this page.
+ */
+void shadow_promote(struct vcpu *v, mfn_t gmfn, u32 type)
+{
+    struct page_info *page = mfn_to_page(gmfn);
+    unsigned long type_info;
+
+    ASSERT(valid_mfn(gmfn));
+
+    /* We should never try to promote a gmfn that has writeable mappings */
+    ASSERT(shadow_remove_write_access(v, gmfn, 0, 0) == 0);
+
+    // Is the page already shadowed?
+    if ( !test_and_set_bit(_PGC_page_table, &page->count_info) )
+    {
+        // No prior shadow exists...
+
+        // Grab a type-ref.  We don't really care if we are racing with another
+        // vcpu or not, or even what kind of type we get; we just want the type
+        // count to be > 0.
+        //
+        do {
+            type_info =
+                page->u.inuse.type_info & (PGT_type_mask | PGT_va_mask);
+        } while ( !get_page_type(page, type_info) );
+
+        // Now that the type ref is non-zero, we can safely use the
+        // shadow_flags.
+        //
+        page->shadow_flags = 0;
+    }
+
+    ASSERT(!test_bit(type >> PGC_SH_type_shift, &page->shadow_flags));
+    set_bit(type >> PGC_SH_type_shift, &page->shadow_flags);
+}
+
+void shadow_demote(struct vcpu *v, mfn_t gmfn, u32 type)
+{
+    struct page_info *page = mfn_to_page(gmfn);
+
+    ASSERT(test_bit(_PGC_page_table, &page->count_info));
+    ASSERT(test_bit(type >> PGC_SH_type_shift, &page->shadow_flags));
+
+    clear_bit(type >> PGC_SH_type_shift, &page->shadow_flags);
+
+    if ( (page->shadow_flags & SHF_page_type_mask) == 0 )
+    {
+        // release the extra type ref
+        put_page_type(page);
+
+        // clear the is-a-page-table bit.
+        clear_bit(_PGC_page_table, &page->count_info);
+    }
+}
+
+/**************************************************************************/
+/* Validate a pagetable change from the guest and update the shadows.
+ * Returns a bitmask of SHADOW_SET_* flags. */
+
+static int
+__shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, 
+                               void *entry, u32 size)
+{
+    int result = 0;
+    struct page_info *page = mfn_to_page(gmfn);
+
+    sh_mark_dirty(v->domain, gmfn);
+    
+    // Determine which types of shadows are affected, and update each.
+    //
+    // Always validate L1s before L2s to prevent another cpu with a linear
+    // mapping of this gmfn from seeing a walk that results from 
+    // using the new L2 value and the old L1 value.  (It is OK for such a
+    // guest to see a walk that uses the old L2 value with the new L1 value,
+    // as hardware could behave this way if one level of the pagewalk occurs
+    // before the store, and the next level of the pagewalk occurs after the
+    // store.
+    //
+    // Ditto for L2s before L3s, etc.
+    //
+
+    if ( !(page->count_info & PGC_page_table) )
+        return 0;  /* Not shadowed at all */
+
+#if CONFIG_PAGING_LEVELS == 2
+    if ( page->shadow_flags & SHF_L1_32 ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, 2, 2)
+            (v, gmfn, entry, size);
+#else 
+    if ( page->shadow_flags & SHF_L1_32 ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, 3, 2)
+            (v, gmfn, entry, size);
+#endif
+
+#if CONFIG_PAGING_LEVELS == 2
+    if ( page->shadow_flags & SHF_L2_32 ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, 2, 2)
+            (v, gmfn, entry, size);
+#else 
+    if ( page->shadow_flags & SHF_L2_32 ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, 3, 2)
+            (v, gmfn, entry, size);
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 3 
+    if ( page->shadow_flags & SHF_L1_PAE ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, 3, 3)
+            (v, gmfn, entry, size);
+    if ( page->shadow_flags & SHF_L2_PAE ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, 3, 3)
+            (v, gmfn, entry, size);
+    if ( page->shadow_flags & SHF_L2H_PAE ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2he, 3, 3)
+            (v, gmfn, entry, size);
+    if ( page->shadow_flags & SHF_L3_PAE ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl3e, 3, 3)
+            (v, gmfn, entry, size);
+#else /* 32-bit non-PAE hypervisor does not support PAE guests */
+    ASSERT((page->shadow_flags & (SHF_L3_PAE|SHF_L2_PAE|SHF_L1_PAE)) == 0);
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 4 
+    if ( page->shadow_flags & SHF_L1_64 ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, 4, 4)
+            (v, gmfn, entry, size);
+    if ( page->shadow_flags & SHF_L2_64 ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, 4, 4)
+            (v, gmfn, entry, size);
+    if ( page->shadow_flags & SHF_L3_64 ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl3e, 4, 4)
+            (v, gmfn, entry, size);
+    if ( page->shadow_flags & SHF_L4_64 ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl4e, 4, 4)
+            (v, gmfn, entry, size);
+#else /* 32-bit/PAE hypervisor does not support 64-bit guests */
+    ASSERT((page->shadow_flags 
+            & (SHF_L4_64|SHF_L3_64|SHF_L2_64|SHF_L1_64)) == 0);
+#endif
+
+    return result;
+}
+
+
+int
+shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry)
+/* This is the entry point from hypercalls. It returns a bitmask of all the 
+ * results of shadow_set_l*e() calls, so the caller knows to do TLB flushes. */
+{
+    int rc;
+
+    ASSERT(shadow_lock_is_acquired(v->domain));
+    rc = __shadow_validate_guest_entry(v, gmfn, entry, sizeof(l1_pgentry_t));
+    shadow_audit_tables(v);
+    return rc;
+}
+
+void
+shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn,
+                                void *entry, u32 size)
+/* This is the entry point for emulated writes to pagetables in HVM guests */
+{
+    struct domain *d = v->domain;
+    int rc;
+
+    ASSERT(shadow_lock_is_acquired(v->domain));
+    rc = __shadow_validate_guest_entry(v, gmfn, entry, size);
+    if ( rc & SHADOW_SET_FLUSH )
+    {
+        // Flush everyone except the local processor, which will flush when it
+        // re-enters the HVM guest.
+        //
+        cpumask_t mask = d->domain_dirty_cpumask;
+        cpu_clear(v->processor, mask);
+        flush_tlb_mask(mask);
+    }
+    if ( rc & SHADOW_SET_ERROR ) 
+    {
+        /* This page is probably not a pagetable any more: tear it out of the 
+         * shadows, along with any tables that reference it */
+        shadow_remove_all_shadows_and_parents(v, gmfn);
+    }
+    /* We ignore the other bits: since we are about to change CR3 on
+     * VMENTER we don't need to do any extra TLB flushes. */ 
+}
+
+
+/**************************************************************************/
+/* Memory management for shadow pages. */ 
+
+/* Meaning of the count_info field in shadow pages
+ * ----------------------------------------------
+ * 
+ * A count of all references to this page from other shadow pages and
+ * guest CR3s (a.k.a. v->arch.shadow.table).  
+ *
+ * The top bits hold the shadow type and the pinned bit.  Top-level
+ * shadows are pinned so that they don't disappear when not in a CR3
+ * somewhere.
+ *
+ * We don't need to use get|put_page for this as the updates are all
+ * protected by the shadow lock.  We can't use get|put_page for this
+ * as the size of the count on shadow pages is different from that on
+ * normal guest pages.
+ */
+
+/* Meaning of the type_info field in shadow pages
+ * ----------------------------------------------
+ * 
+ * type_info use depends on the shadow type (from count_info)
+ * 
+ * PGC_SH_none : This page is in the shadow free pool.  type_info holds
+ *                the chunk order for our freelist allocator.
+ *
+ * PGC_SH_l*_shadow : This page is in use as a shadow. type_info 
+ *                     holds the mfn of the guest page being shadowed,
+ *
+ * PGC_SH_fl1_*_shadow : This page is being used to shatter a superpage.
+ *                        type_info holds the gfn being shattered.
+ *
+ * PGC_SH_monitor_table : This page is part of a monitor table.
+ *                         type_info is not used.
+ */
+
+/* Meaning of the _domain field in shadow pages
+ * --------------------------------------------
+ *
+ * In shadow pages, this field will always have its least significant bit
+ * set.  This ensures that all attempts to get_page() will fail (as all
+ * valid pickled domain pointers have a zero for their least significant bit).
+ * Instead, the remaining upper bits are used to record the shadow generation
+ * counter when the shadow was created.
+ */
+
+/* Meaning of the shadow_flags field
+ * ----------------------------------
+ * 
+ * In guest pages that are shadowed, one bit for each kind of shadow they have.
+ * 
+ * In shadow pages, will be used for holding a representation of the populated
+ * entries in this shadow (either a min/max, or a bitmap, or ...)
+ *
+ * In monitor-table pages, holds the level of the particular page (to save
+ * spilling the shadow types into an extra bit by having three types of monitor
+ * page).
+ */
+
+/* Meaning of the list_head struct in shadow pages
+ * -----------------------------------------------
+ *
+ * In free shadow pages, this is used to hold the free-lists of chunks.
+ *
+ * In top-level shadow tables, this holds a linked-list of all top-level
+ * shadows (used for recovering memory and destroying shadows). 
+ *
+ * In lower-level shadows, this holds the physical address of a higher-level
+ * shadow entry that holds a reference to this shadow (or zero).
+ */
+
+/* Allocating shadow pages
+ * -----------------------
+ *
+ * Most shadow pages are allocated singly, but there are two cases where we 
+ * need to allocate multiple pages together.
+ * 
+ * 1: Shadowing 32-bit guest tables on PAE or 64-bit shadows.
+ *    A 32-bit guest l1 table covers 4MB of virtuial address space,
+ *    and needs to be shadowed by two PAE/64-bit l1 tables (covering 2MB
+ *    of virtual address space each).  Similarly, a 32-bit guest l2 table 
+ *    (4GB va) needs to be shadowed by four PAE/64-bit l2 tables (1GB va 
+ *    each).  These multi-page shadows are contiguous and aligned; 
+ *    functions for handling offsets into them are defined in shadow.c 
+ *    (shadow_l1_index() etc.)
+ *    
+ * 2: Shadowing PAE top-level pages.  Each guest page that contains
+ *    any PAE top-level pages requires two shadow pages to shadow it.
+ *    They contain alternating l3 tables and pae_l3_bookkeeping structs.
+ *
+ * This table shows the allocation behaviour of the different modes:
+ *
+ * Xen paging      32b  pae  pae  64b  64b  64b
+ * Guest paging    32b  32b  pae  32b  pae  64b
+ * PV or HVM        *   HVM   *   HVM  HVM   * 
+ * Shadow paging   32b  pae  pae  pae  pae  64b
+ *
+ * sl1 size         4k   8k   4k   8k   4k   4k
+ * sl2 size         4k  16k   4k  16k   4k   4k
+ * sl3 size         -    -    8k   -    8k   4k
+ * sl4 size         -    -    -    -    -    4k
+ *
+ * We allocate memory from xen in four-page units and break them down
+ * with a simple buddy allocator.  Can't use the xen allocator to handle
+ * this as it only works for contiguous zones, and a domain's shadow
+ * pool is made of fragments.
+ *
+ * In HVM guests, the p2m table is built out of shadow pages, and we provide 
+ * a function for the p2m management to steal pages, in max-order chunks, from 
+ * the free pool.  We don't provide for giving them back, yet.
+ */
+
+/* Figure out the least acceptable quantity of shadow memory.
+ * The minimum memory requirement for always being able to free up a
+ * chunk of memory is very small -- only three max-order chunks per
+ * vcpu to hold the top level shadows and pages with Xen mappings in them.  
+ *
+ * But for a guest to be guaranteed to successfully execute a single
+ * instruction, we must be able to map a large number (about thirty) VAs
+ * at the same time, which means that to guarantee progress, we must
+ * allow for more than ninety allocated pages per vcpu.  We round that
+ * up to 128 pages, or half a megabyte per vcpu. */
+unsigned int shadow_min_acceptable_pages(struct domain *d) 
+{
+    u32 vcpu_count = 0;
+    struct vcpu *v;
+
+    for_each_vcpu(d, v)
+        vcpu_count++;
+
+    return (vcpu_count * 128);
+}
+
+/* Using the type_info field to store freelist order */
+#define SH_PFN_ORDER(_p) ((_p)->u.inuse.type_info)
+#define SH_SET_PFN_ORDER(_p, _o)                       \
+ do { (_p)->u.inuse.type_info = (_o); } while (0)
+ 
+
+/* Figure out the order of allocation needed for a given shadow type */
+static inline u32
+shadow_order(u32 shadow_type) 
+{
+#if CONFIG_PAGING_LEVELS > 2
+    static const u32 type_to_order[16] = {
+        0, /* PGC_SH_none           */
+        1, /* PGC_SH_l1_32_shadow   */
+        1, /* PGC_SH_fl1_32_shadow  */
+        2, /* PGC_SH_l2_32_shadow   */
+        0, /* PGC_SH_l1_pae_shadow  */
+        0, /* PGC_SH_fl1_pae_shadow */
+        0, /* PGC_SH_l2_pae_shadow  */
+        0, /* PGC_SH_l2h_pae_shadow */
+        1, /* PGC_SH_l3_pae_shadow  */
+        0, /* PGC_SH_l1_64_shadow   */
+        0, /* PGC_SH_fl1_64_shadow  */
+        0, /* PGC_SH_l2_64_shadow   */
+        0, /* PGC_SH_l3_64_shadow   */
+        0, /* PGC_SH_l4_64_shadow   */
+        2, /* PGC_SH_p2m_table      */
+        0  /* PGC_SH_monitor_table  */
+        };
+    u32 type = (shadow_type & PGC_SH_type_mask) >> PGC_SH_type_shift;
+    return type_to_order[type];
+#else  /* 32-bit Xen only ever shadows 32-bit guests on 32-bit shadows. */
+    return 0;
+#endif
+}
+
+
+/* Do we have a free chunk of at least this order? */
+static inline int chunk_is_available(struct domain *d, int order)
+{
+    int i;
+    
+    for ( i = order; i <= SHADOW_MAX_ORDER; i++ )
+        if ( !list_empty(&d->arch.shadow.freelists[i]) )
+            return 1;
+    return 0;
+}
+
+/* Dispatcher function: call the per-mode function that will unhook the
+ * non-Xen mappings in this top-level shadow mfn */
+void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn)
+{
+    struct page_info *pg = mfn_to_page(smfn);
+    switch ( (pg->count_info & PGC_SH_type_mask) >> PGC_SH_type_shift )
+    {
+    case PGC_SH_l2_32_shadow >> PGC_SH_type_shift:
+#if CONFIG_PAGING_LEVELS == 2
+        SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings,2,2)(v,smfn);
+#else
+        SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings,3,2)(v,smfn);
+#endif
+        break;
+#if CONFIG_PAGING_LEVELS >= 3
+    case PGC_SH_l3_pae_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_unhook_pae_mappings,3,3)(v,smfn);
+        break;
+#endif
+#if CONFIG_PAGING_LEVELS >= 4
+    case PGC_SH_l4_64_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_unhook_64b_mappings,4,4)(v,smfn);
+        break;
+#endif
+    default:
+        SHADOW_PRINTK("top-level shadow has bad type %08lx\n", 
+                       (unsigned long)((pg->count_info & PGC_SH_type_mask)
+                                       >> PGC_SH_type_shift));
+        BUG();
+    }
+}
+
+
+/* Make sure there is at least one chunk of the required order available
+ * in the shadow page pool. This must be called before any calls to
+ * shadow_alloc().  Since this will free existing shadows to make room,
+ * it must be called early enough to avoid freeing shadows that the
+ * caller is currently working on. */
+void shadow_prealloc(struct domain *d, unsigned int order)
+{
+    /* Need a vpcu for calling unpins; for now, since we don't have
+     * per-vcpu shadows, any will do */
+    struct vcpu *v = d->vcpu[0];
+    struct list_head *l, *t;
+    struct page_info *pg;
+    mfn_t smfn;
+
+    if ( chunk_is_available(d, order) ) return; 
+    
+    /* Stage one: walk the list of top-level pages, unpinning them */
+    perfc_incrc(shadow_prealloc_1);
+    list_for_each_backwards_safe(l, t, &d->arch.shadow.toplevel_shadows)
+    {
+        pg = list_entry(l, struct page_info, list);
+        smfn = page_to_mfn(pg);
+
+#if CONFIG_PAGING_LEVELS >= 3
+        if ( (pg->count_info & PGC_SH_type_mask) == PGC_SH_l3_pae_shadow )
+        {
+            /* For PAE, we need to unpin each subshadow on this shadow */
+            SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows,3,3)(v, smfn);
+        } 
+        else 
+#endif /* 32-bit code always takes this branch */
+        {
+            /* Unpin this top-level shadow */
+            sh_unpin(v, smfn);
+        }
+
+        /* See if that freed up a chunk of appropriate size */
+        if ( chunk_is_available(d, order) ) return;
+    }
+
+    /* Stage two: all shadow pages are in use in hierarchies that are
+     * loaded in cr3 on some vcpu.  Walk them, unhooking the non-Xen
+     * mappings. */
+    perfc_incrc(shadow_prealloc_2);
+    v = current;
+    if ( v->domain != d )
+        v = d->vcpu[0];
+    /* Walk the list from the tail: recently used toplevels have been pulled
+     * to the head */
+    list_for_each_backwards_safe(l, t, &d->arch.shadow.toplevel_shadows)
+    {
+        pg = list_entry(l, struct page_info, list);
+        smfn = page_to_mfn(pg);
+        shadow_unhook_mappings(v, smfn);
+
+        /* Need to flush TLB if we've altered our own tables */
+        if ( !shadow_mode_external(d) 
+             && pagetable_get_pfn(current->arch.shadow_table) == mfn_x(smfn) )
+            local_flush_tlb();
+        
+        /* See if that freed up a chunk of appropriate size */
+        if ( chunk_is_available(d, order) ) return;
+    }
+    
+    /* Nothing more we can do: all remaining shadows are of pages that
+     * hold Xen mappings for some vcpu.  This can never happen. */
+    SHADOW_PRINTK("Can't pre-allocate %i shadow pages!\n"
+                   "  shadow pages total = %u, free = %u, p2m=%u\n",
+                   1 << order, 
+                   d->arch.shadow.total_pages, 
+                   d->arch.shadow.free_pages, 
+                   d->arch.shadow.p2m_pages);
+    BUG();
+}
+
+
+/* Allocate another shadow's worth of (contiguous, aligned) pages,
+ * and fill in the type and backpointer fields of their page_infos. 
+ * Never fails to allocate. */
+mfn_t shadow_alloc(struct domain *d,  
+                    u32 shadow_type,
+                    unsigned long backpointer)
+{
+    struct page_info *pg = NULL;
+    unsigned int order = shadow_order(shadow_type);
+    cpumask_t mask;
+    void *p;
+    int i;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(order <= SHADOW_MAX_ORDER);
+    ASSERT(shadow_type != PGC_SH_none);
+    perfc_incrc(shadow_alloc);
+
+    /* Find smallest order which can satisfy the request. */
+    for ( i = order; i <= SHADOW_MAX_ORDER; i++ )
+        if ( !list_empty(&d->arch.shadow.freelists[i]) )
+        {
+            pg = list_entry(d->arch.shadow.freelists[i].next, 
+                            struct page_info, list);
+            list_del(&pg->list);
+            
+            /* We may have to halve the chunk a number of times. */
+            while ( i != order )
+            {
+                i--;
+                SH_SET_PFN_ORDER(pg, i);
+                list_add_tail(&pg->list, &d->arch.shadow.freelists[i]);
+                pg += 1 << i;
+            }
+            d->arch.shadow.free_pages -= 1 << order;
+
+            /* Init page info fields and clear the pages */
+            for ( i = 0; i < 1<<order ; i++ ) 
+            {
+                pg[i].u.inuse.type_info = backpointer;
+                pg[i].count_info = shadow_type;
+                pg[i].shadow_flags = 0;
+                INIT_LIST_HEAD(&pg[i].list);
+                /* Before we overwrite the old contents of this page, 
+                 * we need to be sure that no TLB holds a pointer to it. */
+                mask = d->domain_dirty_cpumask;
+                tlbflush_filter(mask, pg[i].tlbflush_timestamp);
+                if ( unlikely(!cpus_empty(mask)) )
+                {
+                    perfc_incrc(shadow_alloc_tlbflush);
+                    flush_tlb_mask(mask);
+                }
+                /* Now safe to clear the page for reuse */
+                p = sh_map_domain_page(page_to_mfn(pg+i));
+                ASSERT(p != NULL);
+                clear_page(p);
+                sh_unmap_domain_page(p);
+                perfc_incr(shadow_alloc_count);
+            }
+            return page_to_mfn(pg);
+        }
+    
+    /* If we get here, we failed to allocate. This should never happen.
+     * It means that we didn't call shadow_prealloc() correctly before
+     * we allocated.  We can't recover by calling prealloc here, because
+     * we might free up higher-level pages that the caller is working on. */
+    SHADOW_PRINTK("Can't allocate %i shadow pages!\n", 1 << order);
+    BUG();
+}
+
+
+/* Return some shadow pages to the pool. */
+void shadow_free(struct domain *d, mfn_t smfn)
+{
+    struct page_info *pg = mfn_to_page(smfn); 
+    u32 shadow_type;
+    unsigned long order;
+    unsigned long mask;
+    int i;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    perfc_incrc(shadow_free);
+
+    shadow_type = pg->count_info & PGC_SH_type_mask;
+    ASSERT(shadow_type != PGC_SH_none);
+    ASSERT(shadow_type != PGC_SH_p2m_table);
+    order = shadow_order(shadow_type);
+
+    d->arch.shadow.free_pages += 1 << order;
+
+    for ( i = 0; i < 1<<order; i++ ) 
+    {
+        /* Strip out the type: this is now a free shadow page */
+        pg[i].count_info = 0;
+        /* Remember the TLB timestamp so we will know whether to flush 
+         * TLBs when we reuse the page.  Because the destructors leave the
+         * contents of the pages in place, we can delay TLB flushes until
+         * just before the allocator hands the page out again. */
+        pg[i].tlbflush_timestamp = tlbflush_current_time();
+        perfc_decr(shadow_alloc_count);
+    }
+
+    /* Merge chunks as far as possible. */
+    while ( order < SHADOW_MAX_ORDER )
+    {
+        mask = 1 << order;
+        if ( (mfn_x(page_to_mfn(pg)) & mask) ) {
+            /* Merge with predecessor block? */
+            if ( (((pg-mask)->count_info & PGC_SH_type_mask) != PGT_none) 
+                 || (SH_PFN_ORDER(pg-mask) != order) )
+                break;
+            list_del(&(pg-mask)->list);
+            pg -= mask;
+        } else {
+            /* Merge with successor block? */
+            if ( (((pg+mask)->count_info & PGC_SH_type_mask) != PGT_none)
+                 || (SH_PFN_ORDER(pg+mask) != order) )
+                break;
+            list_del(&(pg+mask)->list);
+        }
+        order++;
+    }
+
+    SH_SET_PFN_ORDER(pg, order);
+    list_add_tail(&pg->list, &d->arch.shadow.freelists[order]);
+}
+
+/* Divert some memory from the pool to be used by the p2m mapping.
+ * This action is irreversible: the p2m mapping only ever grows.
+ * That's OK because the p2m table only exists for external domains,
+ * and those domains can't ever turn off shadow mode.
+ * Also, we only ever allocate a max-order chunk, so as to preserve
+ * the invariant that shadow_prealloc() always works.
+ * Returns 0 iff it can't get a chunk (the caller should then
+ * free up some pages in domheap and call set_sh_allocation);
+ * returns non-zero on success.
+ */
+static int
+shadow_alloc_p2m_pages(struct domain *d)
+{
+    struct page_info *pg;
+    u32 i;
+    ASSERT(shadow_lock_is_acquired(d));
+    
+    if ( d->arch.shadow.total_pages 
+         < (shadow_min_acceptable_pages(d) + (1<<SHADOW_MAX_ORDER)) )
+        return 0; /* Not enough shadow memory: need to increase it first */
+    
+    pg = mfn_to_page(shadow_alloc(d, PGC_SH_p2m_table, 0));
+    d->arch.shadow.p2m_pages += (1<<SHADOW_MAX_ORDER);
+    d->arch.shadow.total_pages -= (1<<SHADOW_MAX_ORDER);
+    for (i = 0; i < (1<<SHADOW_MAX_ORDER); i++)
+    {
+        /* Unlike shadow pages, mark p2m pages as owned by the domain */
+        page_set_owner(&pg[i], d);
+        list_add_tail(&pg[i].list, &d->arch.shadow.p2m_freelist);
+    }
+    return 1;
+}
+
+// Returns 0 if no memory is available...
+mfn_t
+shadow_alloc_p2m_page(struct domain *d)
+{
+    struct list_head *entry;
+    mfn_t mfn;
+    void *p;
+
+    if ( list_empty(&d->arch.shadow.p2m_freelist) &&
+         !shadow_alloc_p2m_pages(d) )
+        return _mfn(0);
+    entry = d->arch.shadow.p2m_freelist.next;
+    list_del(entry);
+    list_add_tail(entry, &d->arch.shadow.p2m_inuse);
+    mfn = page_to_mfn(list_entry(entry, struct page_info, list));
+    sh_get_ref(mfn, 0);
+    p = sh_map_domain_page(mfn);
+    clear_page(p);
+    sh_unmap_domain_page(p);
+
+    return mfn;
+}
+
+#if CONFIG_PAGING_LEVELS == 3
+static void p2m_install_entry_in_monitors(struct domain *d, 
+                                          l3_pgentry_t *l3e) 
+/* Special case, only used for external-mode domains on PAE hosts:
+ * update the mapping of the p2m table.  Once again, this is trivial in
+ * other paging modes (one top-level entry points to the top-level p2m,
+ * no maintenance needed), but PAE makes life difficult by needing a
+ * copy the eight l3es of the p2m table in eight l2h slots in the
+ * monitor table.  This function makes fresh copies when a p2m l3e
+ * changes. */
+{
+    l2_pgentry_t *ml2e;
+    struct vcpu *v;
+    unsigned int index;
+
+    index = ((unsigned long)l3e & ~PAGE_MASK) / sizeof(l3_pgentry_t);
+    ASSERT(index < MACHPHYS_MBYTES>>1);
+
+    for_each_vcpu(d, v) 
+    {
+        if ( pagetable_get_pfn(v->arch.monitor_table) == 0 ) 
+            continue;
+        ASSERT(shadow_mode_external(v->domain));
+
+        SHADOW_DEBUG(P2M, "d=%u v=%u index=%u mfn=%#lx\n",
+                      d->domain_id, v->vcpu_id, index, l3e_get_pfn(*l3e));
+
+        if ( v == current ) /* OK to use linear map of monitor_table */
+            ml2e = __linear_l2_table + l2_linear_offset(RO_MPT_VIRT_START);
+        else 
+        {
+            l3_pgentry_t *ml3e;
+            ml3e = sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
+            ASSERT(l3e_get_flags(ml3e[3]) & _PAGE_PRESENT);
+            ml2e = sh_map_domain_page(_mfn(l3e_get_pfn(ml3e[3])));
+            ml2e += l2_table_offset(RO_MPT_VIRT_START);
+            sh_unmap_domain_page(ml3e);
+        }
+        ml2e[index] = l2e_from_pfn(l3e_get_pfn(*l3e), __PAGE_HYPERVISOR);
+        if ( v != current )
+            sh_unmap_domain_page(ml2e);
+    }
+}
+#endif
+
+// Find the next level's P2M entry, checking for out-of-range gfn's...
+// Returns NULL on error.
+//
+static l1_pgentry_t *
+p2m_find_entry(void *table, unsigned long *gfn_remainder,
+                   unsigned long gfn, u32 shift, u32 max)
+{
+    u32 index;
+
+    index = *gfn_remainder >> shift;
+    if ( index >= max )
+    {
+        SHADOW_DEBUG(P2M, "gfn=0x%lx out of range "
+                      "(gfn_remainder=0x%lx shift=%d index=0x%x max=0x%x)\n",
+                       gfn, *gfn_remainder, shift, index, max);
+        return NULL;
+    }
+    *gfn_remainder &= (1 << shift) - 1;
+    return (l1_pgentry_t *)table + index;
+}
+
+// Walk one level of the P2M table, allocating a new table if required.
+// Returns 0 on error.
+//
+static int
+p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table, 
+               unsigned long *gfn_remainder, unsigned long gfn, u32 shift, 
+               u32 max, unsigned long type)
+{
+    l1_pgentry_t *p2m_entry;
+    void *next;
+
+    if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
+                                      shift, max)) )
+        return 0;
+
+    if ( !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) )
+    {
+        mfn_t mfn = shadow_alloc_p2m_page(d);
+        if ( mfn_x(mfn) == 0 )
+            return 0;
+        *p2m_entry = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER);
+        mfn_to_page(mfn)->u.inuse.type_info = type | 1 | PGT_validated;
+        mfn_to_page(mfn)->count_info = 1;
+#if CONFIG_PAGING_LEVELS == 3
+        if (type == PGT_l2_page_table)
+        {
+            /* We have written to the p2m l3: need to sync the per-vcpu
+             * copies of it in the monitor tables */
+            p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p2m_entry);
+        }
+#endif
+        /* The P2M can be shadowed: keep the shadows synced */
+        if ( d->vcpu[0] )
+            (void)__shadow_validate_guest_entry(d->vcpu[0], *table_mfn,
+                                                 p2m_entry, sizeof *p2m_entry);
+    }
+    *table_mfn = _mfn(l1e_get_pfn(*p2m_entry));
+    next = sh_map_domain_page(*table_mfn);
+    sh_unmap_domain_page(*table);
+    *table = next;
+
+    return 1;
+}
+
+// Returns 0 on error (out of memory)
+int
+shadow_set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
+{
+    // XXX -- this might be able to be faster iff current->domain == d
+    mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
+    void *table = sh_map_domain_page(table_mfn);
+    unsigned long gfn_remainder = gfn;
+    l1_pgentry_t *p2m_entry;
+
+#if CONFIG_PAGING_LEVELS >= 4
+    if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
+                         L4_PAGETABLE_SHIFT - PAGE_SHIFT,
+                         L4_PAGETABLE_ENTRIES, PGT_l3_page_table) )
+        return 0;
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
+    // When using PAE Xen, we only allow 33 bits of pseudo-physical
+    // address in translated guests (i.e. 8 GBytes).  This restriction
+    // comes from wanting to map the P2M table into the 16MB RO_MPT hole
+    // in Xen's address space for translated PV guests.
+    //
+    if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
+                         L3_PAGETABLE_SHIFT - PAGE_SHIFT,
+                         (CONFIG_PAGING_LEVELS == 3
+                          ? 8
+                          : L3_PAGETABLE_ENTRIES),
+                         PGT_l2_page_table) )
+        return 0;
+#endif
+    if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
+                         L2_PAGETABLE_SHIFT - PAGE_SHIFT,
+                         L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
+        return 0;
+
+    p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
+                               0, L1_PAGETABLE_ENTRIES);
+    ASSERT(p2m_entry);
+    if ( valid_mfn(mfn) )
+        *p2m_entry = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER);
+    else
+        *p2m_entry = l1e_empty();
+
+    /* The P2M can be shadowed: keep the shadows synced */
+    (void) __shadow_validate_guest_entry(d->vcpu[0], table_mfn, 
+                                          p2m_entry, sizeof *p2m_entry);
+
+    sh_unmap_domain_page(table);
+
+    return 1;
+}
+
+// Allocate a new p2m table for a domain.
+//
+// The structure of the p2m table is that of a pagetable for xen (i.e. it is
+// controlled by CONFIG_PAGING_LEVELS).
+//
+// Returns 0 if p2m table could not be initialized
+//
+static int
+shadow_alloc_p2m_table(struct domain *d)
+{
+    mfn_t p2m_top;
+    struct list_head *entry;
+    unsigned int page_count = 0;
+    
+    SHADOW_PRINTK("allocating p2m table\n");
+    ASSERT(pagetable_get_pfn(d->arch.phys_table) == 0);
+
+    p2m_top = shadow_alloc_p2m_page(d);
+    mfn_to_page(p2m_top)->count_info = 1;
+    mfn_to_page(p2m_top)->u.inuse.type_info = 
+#if CONFIG_PAGING_LEVELS == 4
+        PGT_l4_page_table
+#elif CONFIG_PAGING_LEVELS == 3
+        PGT_l3_page_table
+#elif CONFIG_PAGING_LEVELS == 2
+        PGT_l2_page_table
+#endif
+        | 1 | PGT_validated;
+   
+    if ( mfn_x(p2m_top) == 0 )
+        return 0;
+
+    d->arch.phys_table = pagetable_from_mfn(p2m_top);
+
+    SHADOW_PRINTK("populating p2m table\n");
+ 
+    for ( entry = d->page_list.next;
+          entry != &d->page_list;
+          entry = entry->next )
+    {
+        struct page_info *page = list_entry(entry, struct page_info, list);
+        mfn_t mfn = page_to_mfn(page);
+        unsigned long gfn = get_gpfn_from_mfn(mfn_x(mfn));
+        page_count++;
+        if (
+#ifdef __x86_64__
+            (gfn != 0x5555555555555555L)
+#else
+            (gfn != 0x55555555L)
+#endif
+             && gfn != INVALID_M2P_ENTRY
+             && !shadow_set_p2m_entry(d, gfn, mfn) )
+        {
+            SHADOW_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%" SH_PRI_mfn "\n",
+                           gfn, mfn_x(mfn));
+            return 0;
+        }
+    }
+
+    SHADOW_PRINTK("p2m table initialised (%u pages)\n", page_count);
+    return 1;
+}
+
+mfn_t
+sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
+/* Read another domain's p2m entries */
+{
+    mfn_t mfn;
+    unsigned long addr = gpfn << PAGE_SHIFT;
+    l2_pgentry_t *l2e;
+    l1_pgentry_t *l1e;
+    
+    ASSERT(shadow_mode_translate(d));
+    mfn = pagetable_get_mfn(d->arch.phys_table);
+
+
+#if CONFIG_PAGING_LEVELS > 2
+    if ( gpfn > (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t) ) 
+        /* This pfn is higher than the p2m map can hold */
+        return _mfn(INVALID_MFN);
+#endif
+
+
+#if CONFIG_PAGING_LEVELS >= 4
+    { 
+        l4_pgentry_t *l4e = sh_map_domain_page(mfn);
+        l4e += l4_table_offset(addr);
+        if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 )
+        {
+            sh_unmap_domain_page(l4e);
+            return _mfn(INVALID_MFN);
+        }
+        mfn = _mfn(l4e_get_pfn(*l4e));
+        sh_unmap_domain_page(l4e);
+    }
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
+    {
+        l3_pgentry_t *l3e = sh_map_domain_page(mfn);
+        l3e += l3_table_offset(addr);
+        if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
+        {
+            sh_unmap_domain_page(l3e);
+            return _mfn(INVALID_MFN);
+        }
+        mfn = _mfn(l3e_get_pfn(*l3e));
+        sh_unmap_domain_page(l3e);
+    }
+#endif
+
+    l2e = sh_map_domain_page(mfn);
+    l2e += l2_table_offset(addr);
+    if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 )
+    {
+        sh_unmap_domain_page(l2e);
+        return _mfn(INVALID_MFN);
+    }
+    mfn = _mfn(l2e_get_pfn(*l2e));
+    sh_unmap_domain_page(l2e);
+
+    l1e = sh_map_domain_page(mfn);
+    l1e += l1_table_offset(addr);
+    if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 )
+    {
+        sh_unmap_domain_page(l1e);
+        return _mfn(INVALID_MFN);
+    }
+    mfn = _mfn(l1e_get_pfn(*l1e));
+    sh_unmap_domain_page(l1e);
+
+    return mfn;
+}
+
+unsigned long
+shadow_gfn_to_mfn_foreign(unsigned long gpfn)
+{
+    return mfn_x(sh_gfn_to_mfn_foreign(current->domain, gpfn));
+}
+
+
+static void shadow_p2m_teardown(struct domain *d)
+/* Return all the p2m pages to Xen.
+ * We know we don't have any extra mappings to these pages */
+{
+    struct list_head *entry, *n;
+    struct page_info *pg;
+
+    d->arch.phys_table = pagetable_null();
+
+    list_for_each_safe(entry, n, &d->arch.shadow.p2m_inuse)
+    {
+        pg = list_entry(entry, struct page_info, list);
+        list_del(entry);
+        /* Should have just the one ref we gave it in alloc_p2m_page() */
+        if ( (pg->count_info & PGC_SH_count_mask) != 1 )
+        {
+            SHADOW_PRINTK("Odd p2m page count c=%#x t=%"PRtype_info"\n",
+                           pg->count_info, pg->u.inuse.type_info);
+        }
+        ASSERT(page_get_owner(pg) == d);
+        /* Free should not decrement domain's total allocation, since 
+         * these pages were allocated without an owner. */
+        page_set_owner(pg, NULL); 
+        free_domheap_pages(pg, 0);
+        d->arch.shadow.p2m_pages--;
+        perfc_decr(shadow_alloc_count);
+    }
+    list_for_each_safe(entry, n, &d->arch.shadow.p2m_freelist)
+    {
+        list_del(entry);
+        pg = list_entry(entry, struct page_info, list);
+        ASSERT(page_get_owner(pg) == d);
+        /* Free should not decrement domain's total allocation. */
+        page_set_owner(pg, NULL); 
+        free_domheap_pages(pg, 0);
+        d->arch.shadow.p2m_pages--;
+        perfc_decr(shadow_alloc_count);
+    }
+    ASSERT(d->arch.shadow.p2m_pages == 0);
+}
+
+/* Set the pool of shadow pages to the required number of pages.
+ * Input will be rounded up to at least shadow_min_acceptable_pages(),
+ * plus space for the p2m table.
+ * Returns 0 for success, non-zero for failure. */
+static unsigned int set_sh_allocation(struct domain *d, 
+                                       unsigned int pages,
+                                       int *preempted)
+{
+    struct page_info *pg;
+    unsigned int lower_bound;
+    int j;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    
+    /* Don't allocate less than the minimum acceptable, plus one page per
+     * megabyte of RAM (for the p2m table) */
+    lower_bound = shadow_min_acceptable_pages(d) + (d->tot_pages / 256);
+    if ( pages > 0 && pages < lower_bound )
+        pages = lower_bound;
+    /* Round up to largest block size */
+    pages = (pages + ((1<<SHADOW_MAX_ORDER)-1)) & ~((1<<SHADOW_MAX_ORDER)-1);
+
+    SHADOW_PRINTK("current %i target %i\n", 
+                   d->arch.shadow.total_pages, pages);
+
+    while ( d->arch.shadow.total_pages != pages ) 
+    {
+        if ( d->arch.shadow.total_pages < pages ) 
+        {
+            /* Need to allocate more memory from domheap */
+            pg = alloc_domheap_pages(NULL, SHADOW_MAX_ORDER, 0); 
+            if ( pg == NULL ) 
+            { 
+                SHADOW_PRINTK("failed to allocate shadow pages.\n");
+                return -ENOMEM;
+            }
+            d->arch.shadow.free_pages += 1<<SHADOW_MAX_ORDER;
+            d->arch.shadow.total_pages += 1<<SHADOW_MAX_ORDER;
+            for ( j = 0; j < 1<<SHADOW_MAX_ORDER; j++ ) 
+            {
+                pg[j].u.inuse.type_info = 0;  /* Free page */
+                pg[j].tlbflush_timestamp = 0; /* Not in any TLB */
+            }
+            SH_SET_PFN_ORDER(pg, SHADOW_MAX_ORDER);
+            list_add_tail(&pg->list, 
+                          &d->arch.shadow.freelists[SHADOW_MAX_ORDER]);
+        } 
+        else if ( d->arch.shadow.total_pages > pages ) 
+        {
+            /* Need to return memory to domheap */
+            shadow_prealloc(d, SHADOW_MAX_ORDER);
+            ASSERT(!list_empty(&d->arch.shadow.freelists[SHADOW_MAX_ORDER]));
+            pg = list_entry(d->arch.shadow.freelists[SHADOW_MAX_ORDER].next, 
+                            struct page_info, list);
+            list_del(&pg->list);
+            d->arch.shadow.free_pages -= 1<<SHADOW_MAX_ORDER;
+            d->arch.shadow.total_pages -= 1<<SHADOW_MAX_ORDER;
+            free_domheap_pages(pg, SHADOW_MAX_ORDER);
+        }
+
+        /* Check to see if we need to yield and try again */
+        if ( preempted && hypercall_preempt_check() )
+        {
+            *preempted = 1;
+            return 0;
+        }
+    }
+
+    return 0;
+}
+
+unsigned int shadow_set_allocation(struct domain *d, 
+                                    unsigned int megabytes,
+                                    int *preempted)
+/* Hypercall interface to set the shadow memory allocation */
+{
+    unsigned int rv;
+    shadow_lock(d);
+    rv = set_sh_allocation(d, megabytes << (20 - PAGE_SHIFT), preempted); 
+    SHADOW_PRINTK("dom %u allocation now %u pages (%u MB)\n",
+                   d->domain_id,
+                   d->arch.shadow.total_pages,
+                   shadow_get_allocation(d));
+    shadow_unlock(d);
+    return rv;
+}
+
+/**************************************************************************/
+/* Hash table for storing the guest->shadow mappings */
+
+/* Hash function that takes a gfn or mfn, plus another byte of type info */
+typedef u32 key_t;
+static inline key_t sh_hash(unsigned long n, u8 t) 
+{
+    unsigned char *p = (unsigned char *)&n;
+    key_t k = t;
+    int i;
+    for ( i = 0; i < sizeof(n) ; i++ ) k = (u32)p[i] + (k<<6) + (k<<16) - k;
+    return k;
+}
+
+#if SHADOW_AUDIT & (SHADOW_AUDIT_HASH|SHADOW_AUDIT_HASH_FULL)
+
+/* Before we get to the mechanism, define a pair of audit functions
+ * that sanity-check the contents of the hash table. */
+static void sh_hash_audit_bucket(struct domain *d, int bucket)
+/* Audit one bucket of the hash table */
+{
+    struct shadow_hash_entry *e, *x;
+    struct page_info *pg;
+
+    if ( !(SHADOW_AUDIT_ENABLE) )
+        return;
+
+    e = &d->arch.shadow.hash_table[bucket];
+    if ( e->t == 0 ) return; /* Bucket is empty */ 
+    while ( e )
+    {
+        /* Empty link? */
+        BUG_ON( e->t == 0 ); 
+        /* Bogus type? */
+        BUG_ON( e->t > (PGC_SH_max_shadow >> PGC_SH_type_shift) );
+        /* Wrong bucket? */
+        BUG_ON( sh_hash(e->n, e->t) % SHADOW_HASH_BUCKETS != bucket ); 
+        /* Duplicate entry? */
+        for ( x = e->next; x; x = x->next )
+            BUG_ON( x->n == e->n && x->t == e->t );
+        /* Bogus MFN? */
+        BUG_ON( !valid_mfn(e->smfn) );
+        pg = mfn_to_page(e->smfn);
+        /* Not a shadow? */
+        BUG_ON( page_get_owner(pg) != 0 );
+        /* Wrong kind of shadow? */
+        BUG_ON( (pg->count_info & PGC_SH_type_mask) >> PGC_SH_type_shift 
+                != e->t ); 
+        /* Bad backlink? */
+        BUG_ON( pg->u.inuse.type_info != e->n );
+        if ( e->t != (PGC_SH_fl1_32_shadow >> PGC_SH_type_shift)
+             && e->t != (PGC_SH_fl1_pae_shadow >> PGC_SH_type_shift)
+             && e->t != (PGC_SH_fl1_64_shadow >> PGC_SH_type_shift) )
+        {
+            /* Bad shadow flags on guest page? */
+            BUG_ON( !(mfn_to_page(_mfn(e->n))->shadow_flags & (1<<e->t)) );
+        }
+        /* That entry was OK; on we go */
+        e = e->next;
+    }
+}
+
+#else
+#define sh_hash_audit_bucket(_d, _b)
+#endif /* Hashtable bucket audit */
+
+
+#if SHADOW_AUDIT & SHADOW_AUDIT_HASH_FULL
+
+static void sh_hash_audit(struct domain *d)
+/* Full audit: audit every bucket in the table */
+{
+    int i;
+
+    if ( !(SHADOW_AUDIT_ENABLE) )
+        return;
+
+    for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ ) 
+    {
+        sh_hash_audit_bucket(d, i);
+    }
+}
+
+#else
+#define sh_hash_audit(_d)
+#endif /* Hashtable bucket audit */
+
+/* Memory management interface for bucket allocation.
+ * These ought to come out of shadow memory, but at least on 32-bit
+ * machines we are forced to allocate them from xenheap so that we can
+ * address them. */
+static struct shadow_hash_entry *sh_alloc_hash_entry(struct domain *d)
+{
+    struct shadow_hash_entry *extra, *x;
+    int i;
+
+    /* We need to allocate a new node. Ensure the free list is not empty. 
+     * Allocate new entries in units the same size as the original table. */
+    if ( unlikely(d->arch.shadow.hash_freelist == NULL) )
+    {
+        size_t sz = sizeof(void *) + (SHADOW_HASH_BUCKETS * sizeof(*x));
+        extra = xmalloc_bytes(sz);
+
+        if ( extra == NULL )
+        {
+            /* No memory left! */
+            SHADOW_ERROR("xmalloc() failed when allocating hash buckets.\n");
+            domain_crash_synchronous();
+        }
+        memset(extra, 0, sz);
+
+        /* Record the allocation block so it can be correctly freed later. */
+        *((struct shadow_hash_entry **)&extra[SHADOW_HASH_BUCKETS]) = 
+            d->arch.shadow.hash_allocations;
+        d->arch.shadow.hash_allocations = &extra[0];
+
+        /* Thread a free chain through the newly-allocated nodes. */
+        for ( i = 0; i < (SHADOW_HASH_BUCKETS - 1); i++ )
+            extra[i].next = &extra[i+1];
+        extra[i].next = NULL;
+
+        /* Add the new nodes to the free list. */
+        d->arch.shadow.hash_freelist = &extra[0];
+    }
+
+    /* Allocate a new node from the free list. */
+    x = d->arch.shadow.hash_freelist;
+    d->arch.shadow.hash_freelist = x->next;
+    return x;
+}
+
+static void sh_free_hash_entry(struct domain *d, struct shadow_hash_entry *e)
+{
+    /* Mark the bucket as empty and return it to the free list */
+    e->t = 0; 
+    e->next = d->arch.shadow.hash_freelist;
+    d->arch.shadow.hash_freelist = e;
+}
+
+
+/* Allocate and initialise the table itself.  
+ * Returns 0 for success, 1 for error. */
+static int shadow_hash_alloc(struct domain *d)
+{
+    struct shadow_hash_entry *table;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(!d->arch.shadow.hash_table);
+
+    table = xmalloc_array(struct shadow_hash_entry, SHADOW_HASH_BUCKETS);
+    if ( !table ) return 1;
+    memset(table, 0, 
+           SHADOW_HASH_BUCKETS * sizeof (struct shadow_hash_entry));
+    d->arch.shadow.hash_table = table;
+    return 0;
+}
+
+/* Tear down the hash table and return all memory to Xen.
+ * This function does not care whether the table is populated. */
+static void shadow_hash_teardown(struct domain *d)
+{
+    struct shadow_hash_entry *a, *n;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(d->arch.shadow.hash_table);
+
+    /* Return the table itself */
+    xfree(d->arch.shadow.hash_table);
+    d->arch.shadow.hash_table = NULL;
+
+    /* Return any extra allocations */
+    a = d->arch.shadow.hash_allocations;
+    while ( a ) 
+    {
+        /* We stored a linked-list pointer at the end of each allocation */
+        n = *((struct shadow_hash_entry **)(&a[SHADOW_HASH_BUCKETS]));
+        xfree(a);
+        a = n;
+    }
+    d->arch.shadow.hash_allocations = NULL;
+    d->arch.shadow.hash_freelist = NULL;
+}
+
+
+mfn_t shadow_hash_lookup(struct vcpu *v, unsigned long n, u8 t)
+/* Find an entry in the hash table.  Returns the MFN of the shadow,
+ * or INVALID_MFN if it doesn't exist */
+{
+    struct domain *d = v->domain;
+    struct shadow_hash_entry *p, *x, *head;
+    key_t key;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(d->arch.shadow.hash_table);
+    ASSERT(t);
+
+    sh_hash_audit(d);
+
+    perfc_incrc(shadow_hash_lookups);
+    key = sh_hash(n, t);
+
+    x = head = &d->arch.shadow.hash_table[key % SHADOW_HASH_BUCKETS];
+    p = NULL;
+
+    sh_hash_audit_bucket(d, key % SHADOW_HASH_BUCKETS);
+
+    do
+    {
+        ASSERT(x->t || ((x == head) && (x->next == NULL)));
+
+        if ( x->n == n && x->t == t )
+        {
+            /* Pull-to-front if 'x' isn't already the head item */
+            if ( unlikely(x != head) )
+            {
+                if ( unlikely(d->arch.shadow.hash_walking != 0) )
+                    /* Can't reorder: someone is walking the hash chains */
+                    return x->smfn;
+                else 
+                {
+                    /* Delete 'x' from list and reinsert after head. */
+                    p->next = x->next;
+                    x->next = head->next;
+                    head->next = x;
+                    
+                    /* Swap 'x' contents with head contents. */
+                    SWAP(head->n, x->n);
+                    SWAP(head->t, x->t);
+                    SWAP(head->smfn, x->smfn);
+                }
+            }
+            else
+            {
+                perfc_incrc(shadow_hash_lookup_head);
+            }
+            return head->smfn;
+        }
+
+        p = x;
+        x = x->next;
+    }
+    while ( x != NULL );
+
+    perfc_incrc(shadow_hash_lookup_miss);
+    return _mfn(INVALID_MFN);
+}
+
+void shadow_hash_insert(struct vcpu *v, unsigned long n, u8 t, mfn_t smfn)
+/* Put a mapping (n,t)->smfn into the hash table */
+{
+    struct domain *d = v->domain;
+    struct shadow_hash_entry *x, *head;
+    key_t key;
+    
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(d->arch.shadow.hash_table);
+    ASSERT(t);
+
+    sh_hash_audit(d);
+
+    perfc_incrc(shadow_hash_inserts);
+    key = sh_hash(n, t);
+
+    head = &d->arch.shadow.hash_table[key % SHADOW_HASH_BUCKETS];
+
+    sh_hash_audit_bucket(d, key % SHADOW_HASH_BUCKETS);
+
+    /* If the bucket is empty then insert the new page as the head item. */
+    if ( head->t == 0 )
+    {
+        head->n = n;
+        head->t = t;
+        head->smfn = smfn;
+        ASSERT(head->next == NULL);
+    }
+    else 
+    {
+        /* Insert a new entry directly after the head item. */
+        x = sh_alloc_hash_entry(d);
+        x->n = n; 
+        x->t = t;
+        x->smfn = smfn;
+        x->next = head->next;
+        head->next = x;
+    }
+    
+    sh_hash_audit_bucket(d, key % SHADOW_HASH_BUCKETS);
+}
+
+void shadow_hash_delete(struct vcpu *v, unsigned long n, u8 t, mfn_t smfn)
+/* Excise the mapping (n,t)->smfn from the hash table */
+{
+    struct domain *d = v->domain;
+    struct shadow_hash_entry *p, *x, *head;
+    key_t key;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(d->arch.shadow.hash_table);
+    ASSERT(t);
+
+    sh_hash_audit(d);
+
+    perfc_incrc(shadow_hash_deletes);
+    key = sh_hash(n, t);
+
+    head = &d->arch.shadow.hash_table[key % SHADOW_HASH_BUCKETS];
+
+    sh_hash_audit_bucket(d, key % SHADOW_HASH_BUCKETS);
+
+    /* Match on head item? */
+    if ( head->n == n && head->t == t )
+    {
+        if ( (x = head->next) != NULL )
+        {
+            /* Overwrite head with contents of following node. */
+            head->n = x->n;
+            head->t = x->t;
+            head->smfn = x->smfn;
+
+            /* Delete following node. */
+            head->next = x->next;
+            sh_free_hash_entry(d, x);
+        }
+        else
+        {
+            /* This bucket is now empty. Initialise the head node. */
+            head->t = 0;
+        }
+    }
+    else 
+    {
+        /* Not at the head; need to walk the chain */
+        p = head;
+        x = head->next; 
+        
+        while(1)
+        {
+            ASSERT(x); /* We can't have hit the end, since our target is
+                        * still in the chain somehwere... */
+            if ( x->n == n && x->t == t )
+            {
+                /* Delete matching node. */
+                p->next = x->next;
+                sh_free_hash_entry(d, x);
+                break;
+            }
+            p = x;
+            x = x->next;
+        }
+    }
+
+    sh_hash_audit_bucket(d, key % SHADOW_HASH_BUCKETS);
+}
+
+typedef int (*hash_callback_t)(struct vcpu *v, mfn_t smfn, mfn_t other_mfn);
+
+static void hash_foreach(struct vcpu *v, 
+                         unsigned int callback_mask, 
+                         hash_callback_t callbacks[], 
+                         mfn_t callback_mfn)
+/* Walk the hash table looking at the types of the entries and 
+ * calling the appropriate callback function for each entry. 
+ * The mask determines which shadow types we call back for, and the array
+ * of callbacks tells us which function to call.
+ * Any callback may return non-zero to let us skip the rest of the scan. 
+ *
+ * WARNING: Callbacks MUST NOT add or remove hash entries unless they 
+ * then return non-zero to terminate the scan. */
+{
+    int i, done = 0;
+    struct domain *d = v->domain;
+    struct shadow_hash_entry *x;
+
+    /* Say we're here, to stop hash-lookups reordering the chains */
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(d->arch.shadow.hash_walking == 0);
+    d->arch.shadow.hash_walking = 1;
+
+    callback_mask &= ~1; /* Never attempt to call back on empty buckets */
+    for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ ) 
+    {
+        /* WARNING: This is not safe against changes to the hash table.
+         * The callback *must* return non-zero if it has inserted or
+         * deleted anything from the hash (lookups are OK, though). */
+        for ( x = &d->arch.shadow.hash_table[i]; x; x = x->next )
+        {
+            if ( callback_mask & (1 << x->t) ) 
+            {
+                ASSERT(x->t <= 15);
+                ASSERT(callbacks[x->t] != NULL);
+                if ( (done = callbacks[x->t](v, x->smfn, callback_mfn)) != 0 )
+                    break;
+            }
+        }
+        if ( done ) break; 
+    }
+    d->arch.shadow.hash_walking = 0; 
+}
+
+
+/**************************************************************************/
+/* Destroy a shadow page: simple dispatcher to call the per-type destructor
+ * which will decrement refcounts appropriately and return memory to the 
+ * free pool. */
+
+void sh_destroy_shadow(struct vcpu *v, mfn_t smfn)
+{
+    struct page_info *pg = mfn_to_page(smfn);
+    u32 t = pg->count_info & PGC_SH_type_mask;
+
+
+    SHADOW_PRINTK("smfn=%#lx\n", mfn_x(smfn));
+
+    /* Double-check, if we can, that the shadowed page belongs to this
+     * domain, (by following the back-pointer). */
+    ASSERT(t == PGC_SH_fl1_32_shadow  ||  
+           t == PGC_SH_fl1_pae_shadow ||  
+           t == PGC_SH_fl1_64_shadow  || 
+           t == PGC_SH_monitor_table  || 
+           (page_get_owner(mfn_to_page(_mfn(pg->u.inuse.type_info))) 
+            == v->domain)); 
+
+    /* The down-shifts here are so that the switch statement is on nice
+     * small numbers that the compiler will enjoy */
+    switch ( t >> PGC_SH_type_shift )
+    {
+#if CONFIG_PAGING_LEVELS == 2
+    case PGC_SH_l1_32_shadow >> PGC_SH_type_shift:
+    case PGC_SH_fl1_32_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, 2, 2)(v, smfn); 
+        break;
+    case PGC_SH_l2_32_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 2, 2)(v, smfn);
+        break;
+#else /* PAE or 64bit */
+    case PGC_SH_l1_32_shadow >> PGC_SH_type_shift:
+    case PGC_SH_fl1_32_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, 3, 2)(v, smfn);
+        break;
+    case PGC_SH_l2_32_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 3, 2)(v, smfn);
+        break;
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 3
+    case PGC_SH_l1_pae_shadow >> PGC_SH_type_shift:
+    case PGC_SH_fl1_pae_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, 3, 3)(v, smfn);
+        break;
+    case PGC_SH_l2_pae_shadow >> PGC_SH_type_shift:
+    case PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 3, 3)(v, smfn);
+        break;
+    case PGC_SH_l3_pae_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l3_shadow, 3, 3)(v, smfn);
+        break;
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 4
+    case PGC_SH_l1_64_shadow >> PGC_SH_type_shift:
+    case PGC_SH_fl1_64_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, 4, 4)(v, smfn);
+        break;
+    case PGC_SH_l2_64_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 4, 4)(v, smfn);
+        break;
+    case PGC_SH_l3_64_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l3_shadow, 4, 4)(v, smfn);
+        break;
+    case PGC_SH_l4_64_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l4_shadow, 4, 4)(v, smfn);
+        break;
+#endif
+    default:
+        SHADOW_PRINTK("tried to destroy shadow of bad type %08lx\n", 
+                       (unsigned long)t);
+        BUG();
+    }    
+}
+
+/**************************************************************************/
+/* Remove all writeable mappings of a guest frame from the shadow tables 
+ * Returns non-zero if we need to flush TLBs. 
+ * level and fault_addr desribe how we found this to be a pagetable;
+ * level==0 means we have some other reason for revoking write access.*/
+
+int shadow_remove_write_access(struct vcpu *v, mfn_t gmfn, 
+                                unsigned int level,
+                                unsigned long fault_addr)
+{
+    /* Dispatch table for getting per-type functions */
+    static hash_callback_t callbacks[16] = {
+        NULL, /* none    */
+#if CONFIG_PAGING_LEVELS == 2
+        SHADOW_INTERNAL_NAME(sh_remove_write_access,2,2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_remove_write_access,2,2), /* fl1_32  */
+#else 
+        SHADOW_INTERNAL_NAME(sh_remove_write_access,3,2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_remove_write_access,3,2), /* fl1_32  */
+#endif
+        NULL, /* l2_32   */
+#if CONFIG_PAGING_LEVELS >= 3
+        SHADOW_INTERNAL_NAME(sh_remove_write_access,3,3), /* l1_pae  */
+        SHADOW_INTERNAL_NAME(sh_remove_write_access,3,3), /* fl1_pae */
+#else 
+        NULL, /* l1_pae  */
+        NULL, /* fl1_pae */
+#endif
+        NULL, /* l2_pae  */
+        NULL, /* l2h_pae */
+        NULL, /* l3_pae  */
+#if CONFIG_PAGING_LEVELS >= 4
+        SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* l1_64   */
+        SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* fl1_64  */
+#else
+        NULL, /* l1_64   */
+        NULL, /* fl1_64  */
+#endif
+        NULL, /* l2_64   */
+        NULL, /* l3_64   */
+        NULL, /* l4_64   */
+        NULL, /* p2m     */
+        NULL  /* unused  */
+    };
+
+    static unsigned int callback_mask = 
+          1 << (PGC_SH_l1_32_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_fl1_32_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_l1_pae_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_fl1_pae_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_l1_64_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_fl1_64_shadow >> PGC_SH_type_shift)
+        ;
+    struct page_info *pg = mfn_to_page(gmfn);
+
+    ASSERT(shadow_lock_is_acquired(v->domain));
+
+    /* Only remove writable mappings if we are doing shadow refcounts.
+     * In guest refcounting, we trust Xen to already be restricting
+     * all the writes to the guest page tables, so we do not need to
+     * do more. */
+    if ( !shadow_mode_refcounts(v->domain) )
+        return 0;
+
+    /* Early exit if it's already a pagetable, or otherwise not writeable */
+    if ( sh_mfn_is_a_page_table(gmfn) 
+         || (pg->u.inuse.type_info & PGT_count_mask) == 0 )
+        return 0;
+
+    perfc_incrc(shadow_writeable);
+
+    /* If this isn't a "normal" writeable page, the domain is trying to 
+     * put pagetables in special memory of some kind.  We can't allow that. */
+    if ( (pg->u.inuse.type_info & PGT_type_mask) != PGT_writable_page )
+    {
+        SHADOW_ERROR("can't remove write access to mfn %lx, type_info is %" 
+                      PRtype_info "\n",
+                      mfn_x(gmfn), mfn_to_page(gmfn)->u.inuse.type_info);
+        domain_crash(v->domain);
+    }
+
+#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
+    if ( v == current && level != 0 )
+    {
+        unsigned long gfn;
+        /* Heuristic: there is likely to be only one writeable mapping,
+         * and that mapping is likely to be in the current pagetable,
+         * either in the guest's linear map (linux, windows) or in a
+         * magic slot used to map high memory regions (linux HIGHTPTE) */
+
+#define GUESS(_a, _h) do {                                              \
+            if ( v->arch.shadow.mode->guess_wrmap(v, (_a), gmfn) )          \
+                perfc_incrc(shadow_writeable_h_ ## _h);                \
+            if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )        \
+                return 1;                                               \
+        } while (0)
+
+        
+        /* Linux lowmem: first 1GB is mapped 1-to-1 above 0xC0000000 */
+        if ( v == current 
+             && (gfn = sh_mfn_to_gfn(v->domain, gmfn)) < 0x40000000 )
+            GUESS(0xC0000000 + (gfn << PAGE_SHIFT), 4);
+
+        if ( v->arch.shadow.mode->guest_levels == 2 )
+        {
+            if ( level == 1 )
+                /* 32bit non-PAE w2k3: linear map at 0xC0000000 */
+                GUESS(0xC0000000UL + (fault_addr >> 10), 1);
+        }
+#if CONFIG_PAGING_LEVELS >= 3
+        else if ( v->arch.shadow.mode->guest_levels == 3 )
+        {
+            /* 32bit PAE w2k3: linear map at 0xC0000000 */
+            switch ( level ) 
+            {
+            case 1: GUESS(0xC0000000UL + (fault_addr >> 9), 2); break;
+            case 2: GUESS(0xC0600000UL + (fault_addr >> 18), 2); break;
+            }
+        }
+#if CONFIG_PAGING_LEVELS >= 4
+        else if ( v->arch.shadow.mode->guest_levels == 4 )
+        {
+            /* 64bit w2k3: linear map at 0x0000070000000000 */
+            switch ( level ) 
+            {
+            case 1: GUESS(0x70000000000UL + (fault_addr >> 9), 3); break;
+            case 2: GUESS(0x70380000000UL + (fault_addr >> 18), 3); break;
+            case 3: GUESS(0x70381C00000UL + (fault_addr >> 27), 3); break;
+            }
+        }
+#endif /* CONFIG_PAGING_LEVELS >= 4 */
+#endif /* CONFIG_PAGING_LEVELS >= 3 */
+
+#undef GUESS
+
+    }
+#endif
+    
+    /* Brute-force search of all the shadows, by walking the hash */
+    perfc_incrc(shadow_writeable_bf);
+    hash_foreach(v, callback_mask, callbacks, gmfn);
+
+    /* If that didn't catch the mapping, something is very wrong */
+    if ( (mfn_to_page(gmfn)->u.inuse.type_info & PGT_count_mask) != 0 )
+    {
+        SHADOW_ERROR("can't find all writeable mappings of mfn %lx: "
+                      "%lu left\n", mfn_x(gmfn),
+                      (mfn_to_page(gmfn)->u.inuse.type_info&PGT_count_mask));
+        domain_crash(v->domain);
+    }
+    
+    /* We killed at least one writeable mapping, so must flush TLBs. */
+    return 1;
+}
+
+
+
+/**************************************************************************/
+/* Remove all mappings of a guest frame from the shadow tables.
+ * Returns non-zero if we need to flush TLBs. */
+
+int shadow_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
+{
+    struct page_info *page = mfn_to_page(gmfn);
+    int expected_count;
+
+    /* Dispatch table for getting per-type functions */
+    static hash_callback_t callbacks[16] = {
+        NULL, /* none    */
+#if CONFIG_PAGING_LEVELS == 2
+        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,2,2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,2,2), /* fl1_32  */
+#else 
+        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,2), /* fl1_32  */
+#endif
+        NULL, /* l2_32   */
+#if CONFIG_PAGING_LEVELS >= 3
+        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,3), /* l1_pae  */
+        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,3), /* fl1_pae */
+#else 
+        NULL, /* l1_pae  */
+        NULL, /* fl1_pae */
+#endif
+        NULL, /* l2_pae  */
+        NULL, /* l2h_pae */
+        NULL, /* l3_pae  */
+#if CONFIG_PAGING_LEVELS >= 4
+        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* l1_64   */
+        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* fl1_64  */
+#else
+        NULL, /* l1_64   */
+        NULL, /* fl1_64  */
+#endif
+        NULL, /* l2_64   */
+        NULL, /* l3_64   */
+        NULL, /* l4_64   */
+        NULL, /* p2m     */
+        NULL  /* unused  */
+    };
+
+    static unsigned int callback_mask = 
+          1 << (PGC_SH_l1_32_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_fl1_32_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_l1_pae_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_fl1_pae_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_l1_64_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_fl1_64_shadow >> PGC_SH_type_shift)
+        ;
+
+    perfc_incrc(shadow_mappings);
+    if ( (page->count_info & PGC_count_mask) == 0 )
+        return 0;
+
+    ASSERT(shadow_lock_is_acquired(v->domain));
+
+    /* XXX TODO: 
+     * Heuristics for finding the (probably) single mapping of this gmfn */
+    
+    /* Brute-force search of all the shadows, by walking the hash */
+    perfc_incrc(shadow_mappings_bf);
+    hash_foreach(v, callback_mask, callbacks, gmfn);
+
+    /* If that didn't catch the mapping, something is very wrong */
+    expected_count = (page->count_info & PGC_allocated) ? 1 : 0;
+    if ( (page->count_info & PGC_count_mask) != expected_count )
+    {
+        /* Don't complain if we're in HVM and there's one extra mapping: 
+         * The qemu helper process has an untyped mapping of this dom's RAM */
+        if ( !(shadow_mode_external(v->domain)
+               && (page->count_info & PGC_count_mask) <= 2
+               && (page->u.inuse.type_info & PGT_count_mask) == 0) )
+        {
+            SHADOW_ERROR("can't find all mappings of mfn %lx: "
+                          "c=%08x t=%08lx\n", mfn_x(gmfn), 
+                          page->count_info, page->u.inuse.type_info);
+        }
+    }
+
+    /* We killed at least one mapping, so must flush TLBs. */
+    return 1;
+}
+
+
+/**************************************************************************/
+/* Remove all shadows of a guest frame from the shadow tables */
+
+static int sh_remove_shadow_via_pointer(struct vcpu *v, mfn_t smfn)
+/* Follow this shadow's up-pointer, if it has one, and remove the reference
+ * found there.  Returns 1 if that was the only reference to this shadow */
+{
+    struct page_info *pg = mfn_to_page(smfn);
+    mfn_t pmfn;
+    void *vaddr;
+    int rc;
+
+    ASSERT((pg->count_info & PGC_SH_type_mask) > 0);
+    ASSERT((pg->count_info & PGC_SH_type_mask) < PGC_SH_max_shadow);
+    ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l2_32_shadow);
+    ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l3_pae_shadow);
+    ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l4_64_shadow);
+    
+    if (pg->up == 0) return 0;
+    pmfn = _mfn(pg->up >> PAGE_SHIFT);
+    ASSERT(valid_mfn(pmfn));
+    vaddr = sh_map_domain_page(pmfn);
+    ASSERT(vaddr);
+    vaddr += pg->up & (PAGE_SIZE-1);
+    ASSERT(l1e_get_pfn(*(l1_pgentry_t *)vaddr) == mfn_x(smfn));
+    
+    /* Is this the only reference to this shadow? */
+    rc = ((pg->count_info & PGC_SH_count_mask) == 1) ? 1 : 0;
+
+    /* Blank the offending entry */
+    switch ((pg->count_info & PGC_SH_type_mask)) 
+    {
+    case PGC_SH_l1_32_shadow:
+    case PGC_SH_l2_32_shadow:
+#if CONFIG_PAGING_LEVELS == 2
+        SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,2,2)(v, vaddr, pmfn);
+#else
+        SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,3,2)(v, vaddr, pmfn);
+#endif
+        break;
+#if CONFIG_PAGING_LEVELS >=3
+    case PGC_SH_l1_pae_shadow:
+    case PGC_SH_l2_pae_shadow:
+    case PGC_SH_l2h_pae_shadow:
+    case PGC_SH_l3_pae_shadow:
+        SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,3,3)(v, vaddr, pmfn);
+        break;
+#if CONFIG_PAGING_LEVELS >= 4
+    case PGC_SH_l1_64_shadow:
+    case PGC_SH_l2_64_shadow:
+    case PGC_SH_l3_64_shadow:
+    case PGC_SH_l4_64_shadow:
+        SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,4,4)(v, vaddr, pmfn);
+        break;
+#endif
+#endif
+    default: BUG(); /* Some wierd unknown shadow type */
+    }
+    
+    sh_unmap_domain_page(vaddr);
+    if ( rc )
+        perfc_incrc(shadow_up_pointer);
+    else
+        perfc_incrc(shadow_unshadow_bf);
+
+    return rc;
+}
+
+void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int all)
+/* Remove the shadows of this guest page.  
+ * If all != 0, find all shadows, if necessary by walking the tables.
+ * Otherwise, just try the (much faster) heuristics, which will remove 
+ * at most one reference to each shadow of the page. */
+{
+    struct page_info *pg;
+    mfn_t smfn;
+    u32 sh_flags;
+    unsigned char t;
+
+    /* Dispatch table for getting per-type functions: each level must
+     * be called with the function to remove a lower-level shadow. */
+    static hash_callback_t callbacks[16] = {
+        NULL, /* none    */
+        NULL, /* l1_32   */
+        NULL, /* fl1_32  */
+#if CONFIG_PAGING_LEVELS == 2
+        SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,2,2), /* l2_32   */
+#else 
+        SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,2), /* l2_32   */
+#endif
+        NULL, /* l1_pae  */
+        NULL, /* fl1_pae */
+#if CONFIG_PAGING_LEVELS >= 3
+        SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2_pae  */
+        SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2h_pae */
+        SHADOW_INTERNAL_NAME(sh_remove_l2_shadow,3,3), /* l3_pae  */
+#else 
+        NULL, /* l2_pae  */
+        NULL, /* l2h_pae */
+        NULL, /* l3_pae  */
+#endif
+        NULL, /* l1_64   */
+        NULL, /* fl1_64  */
+#if CONFIG_PAGING_LEVELS >= 4
+        SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,4,4), /* l2_64   */
+        SHADOW_INTERNAL_NAME(sh_remove_l2_shadow,4,4), /* l3_64   */
+        SHADOW_INTERNAL_NAME(sh_remove_l3_shadow,4,4), /* l4_64   */
+#else
+        NULL, /* l2_64   */
+        NULL, /* l3_64   */
+        NULL, /* l4_64   */
+#endif
+        NULL, /* p2m     */
+        NULL  /* unused  */
+    };
+
+    /* Another lookup table, for choosing which mask to use */
+    static unsigned int masks[16] = {
+        0, /* none    */
+        1 << (PGC_SH_l2_32_shadow >> PGC_SH_type_shift), /* l1_32   */
+        0, /* fl1_32  */
+        0, /* l2_32   */
+        ((1 << (PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift))
+         | (1 << (PGC_SH_l2_pae_shadow >> PGC_SH_type_shift))), /* l1_pae  */
+        0, /* fl1_pae */
+        1 << (PGC_SH_l3_pae_shadow >> PGC_SH_type_shift), /* l2_pae  */
+        1 << (PGC_SH_l3_pae_shadow >> PGC_SH_type_shift), /* l2h_pae  */
+        0, /* l3_pae  */
+        1 << (PGC_SH_l2_64_shadow >> PGC_SH_type_shift), /* l1_64   */
+        0, /* fl1_64  */
+        1 << (PGC_SH_l3_64_shadow >> PGC_SH_type_shift), /* l2_64   */
+        1 << (PGC_SH_l4_64_shadow >> PGC_SH_type_shift), /* l3_64   */
+        0, /* l4_64   */
+        0, /* p2m     */
+        0  /* unused  */
+    };
+
+    ASSERT(shadow_lock_is_acquired(v->domain));
+
+    pg = mfn_to_page(gmfn);
+
+    /* Bale out now if the page is not shadowed */
+    if ( (pg->count_info & PGC_page_table) == 0 )
+        return;
+
+    SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx\n",
+                   v->domain->domain_id, v->vcpu_id, mfn_x(gmfn));
+
+    /* Search for this shadow in all appropriate shadows */
+    perfc_incrc(shadow_unshadow);
+    sh_flags = pg->shadow_flags;
+
+    /* Lower-level shadows need to be excised from upper-level shadows.
+     * This call to hash_foreach() looks dangerous but is in fact OK: each
+     * call will remove at most one shadow, and terminate immediately when
+     * it does remove it, so we never walk the hash after doing a deletion.  */
+#define DO_UNSHADOW(_type) do {                                 \
+    t = (_type) >> PGC_SH_type_shift;                          \
+    smfn = shadow_hash_lookup(v, mfn_x(gmfn), t);              \
+    if ( !sh_remove_shadow_via_pointer(v, smfn) && all )       \
+        hash_foreach(v, masks[t], callbacks, smfn);             \
+} while (0)
+
+    /* Top-level shadows need to be unpinned */
+#define DO_UNPIN(_type) do {                                             \
+    t = (_type) >> PGC_SH_type_shift;                                   \
+    smfn = shadow_hash_lookup(v, mfn_x(gmfn), t);                       \
+    if ( mfn_to_page(smfn)->count_info & PGC_SH_pinned )                \
+        sh_unpin(v, smfn);                                              \
+    if ( (_type) == PGC_SH_l3_pae_shadow )                              \
+        SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows,3,3)(v, smfn); \
+} while (0)
+
+    if ( sh_flags & SHF_L1_32 )   DO_UNSHADOW(PGC_SH_l1_32_shadow);
+    if ( sh_flags & SHF_L2_32 )   DO_UNPIN(PGC_SH_l2_32_shadow);
+#if CONFIG_PAGING_LEVELS >= 3
+    if ( sh_flags & SHF_L1_PAE )  DO_UNSHADOW(PGC_SH_l1_pae_shadow);
+    if ( sh_flags & SHF_L2_PAE )  DO_UNSHADOW(PGC_SH_l2_pae_shadow);
+    if ( sh_flags & SHF_L2H_PAE ) DO_UNSHADOW(PGC_SH_l2h_pae_shadow);
+    if ( sh_flags & SHF_L3_PAE )  DO_UNPIN(PGC_SH_l3_pae_shadow);
+#if CONFIG_PAGING_LEVELS >= 4
+    if ( sh_flags & SHF_L1_64 )   DO_UNSHADOW(PGC_SH_l1_64_shadow);
+    if ( sh_flags & SHF_L2_64 )   DO_UNSHADOW(PGC_SH_l2_64_shadow);
+    if ( sh_flags & SHF_L3_64 )   DO_UNSHADOW(PGC_SH_l3_64_shadow);
+    if ( sh_flags & SHF_L4_64 )   DO_UNPIN(PGC_SH_l4_64_shadow);
+#endif
+#endif
+
+#undef DO_UNSHADOW
+#undef DO_UNPIN
+
+
+#if CONFIG_PAGING_LEVELS > 2
+    /* We may have caused some PAE l3 entries to change: need to 
+     * fix up the copies of them in various places */
+    if ( sh_flags & (SHF_L2_PAE|SHF_L2H_PAE) )
+        sh_pae_recopy(v->domain);
+#endif
+
+    /* If that didn't catch the shadows, something is wrong */
+    if ( all && (pg->count_info & PGC_page_table) )
+    {
+        SHADOW_ERROR("can't find all shadows of mfn %05lx (shadow_flags=%08x)\n",
+                      mfn_x(gmfn), pg->shadow_flags);
+        domain_crash(v->domain);
+    }
+}
+
+void
+shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn)
+/* Even harsher: this is a HVM page that we thing is no longer a pagetable.
+ * Unshadow it, and recursively unshadow pages that reference it. */
+{
+    shadow_remove_all_shadows(v, gmfn);
+    /* XXX TODO:
+     * Rework this hashtable walker to return a linked-list of all 
+     * the shadows it modified, then do breadth-first recursion 
+     * to find the way up to higher-level tables and unshadow them too. 
+     *
+     * The current code (just tearing down each page's shadows as we
+     * detect that it is not a pagetable) is correct, but very slow. 
+     * It means extra emulated writes and slows down removal of mappings. */
+}
+
+/**************************************************************************/
+
+void sh_update_paging_modes(struct vcpu *v)
+{
+    struct domain *d = v->domain;
+    struct shadow_paging_mode *old_mode = v->arch.shadow.mode;
+    mfn_t old_guest_table;
+
+    ASSERT(shadow_lock_is_acquired(d));
+
+    // Valid transitions handled by this function:
+    // - For PV guests:
+    //     - after a shadow mode has been changed
+    // - For HVM guests:
+    //     - after a shadow mode has been changed
+    //     - changes in CR0.PG, CR4.PAE, CR4.PSE, or CR4.PGE
+    //
+
+    // Avoid determining the current shadow mode for uninitialized CPUs, as
+    // we can not yet determine whether it is an HVM or PV domain.
+    //
+    if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
+    {
+        printk("%s: postponing determination of shadow mode\n", __func__);
+        return;
+    }
+
+    // First, tear down any old shadow tables held by this vcpu.
+    //
+    shadow_detach_old_tables(v);
+
+    if ( !hvm_guest(v) )
+    {
+        ///
+        /// PV guest
+        ///
+#if CONFIG_PAGING_LEVELS == 4
+        if ( pv_32bit_guest(v) )
+            v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,3);
+        else
+            v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4);
+#elif CONFIG_PAGING_LEVELS == 3
+        v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
+#elif CONFIG_PAGING_LEVELS == 2
+        v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2);
+#else
+#error unexpected paging mode
+#endif
+    }
+    else
+    {
+        ///
+        /// HVM guest
+        ///
+        ASSERT(shadow_mode_translate(d));
+        ASSERT(shadow_mode_external(d));
+
+        v->arch.shadow.hvm_paging_enabled = !!hvm_paging_enabled(v);
+        if ( !v->arch.shadow.hvm_paging_enabled )
+        {
+            
+            /* Set v->arch.guest_table to use the p2m map, and choose
+             * the appropriate shadow mode */
+            old_guest_table = pagetable_get_mfn(v->arch.guest_table);
+#if CONFIG_PAGING_LEVELS == 2
+            v->arch.guest_table =
+                pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table));
+            v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2);
+#elif CONFIG_PAGING_LEVELS == 3 
+            v->arch.guest_table =
+                pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table));
+            v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
+#else /* CONFIG_PAGING_LEVELS == 4 */
+            { 
+                l4_pgentry_t *l4e; 
+                /* Use the start of the first l3 table as a PAE l3 */
+                ASSERT(pagetable_get_pfn(d->arch.phys_table) != 0);
+                l4e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
+                ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT);
+                v->arch.guest_table =
+                    pagetable_from_pfn(l4e_get_pfn(l4e[0]));
+                sh_unmap_domain_page(l4e);
+            }
+            v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
+#endif
+            /* Fix up refcounts on guest_table */
+            get_page(mfn_to_page(pagetable_get_mfn(v->arch.guest_table)), d);
+            if ( mfn_x(old_guest_table) != 0 )
+                put_page(mfn_to_page(old_guest_table));
+        }
+        else
+        {
+#ifdef __x86_64__
+            if ( hvm_long_mode_enabled(v) )
+            {
+                // long mode guest...
+                v->arch.shadow.mode =
+                    &SHADOW_INTERNAL_NAME(sh_paging_mode, 4, 4);
+            }
+            else
+#endif
+                if ( hvm_get_guest_ctrl_reg(v, 4) & X86_CR4_PAE )
+                {
+#if CONFIG_PAGING_LEVELS >= 3
+                    // 32-bit PAE mode guest...
+                    v->arch.shadow.mode =
+                        &SHADOW_INTERNAL_NAME(sh_paging_mode, 3, 3);
+#else
+                    SHADOW_ERROR("PAE not supported in 32-bit Xen\n");
+                    domain_crash(d);
+                    return;
+#endif
+                }
+                else
+                {
+                    // 32-bit 2 level guest...
+#if CONFIG_PAGING_LEVELS >= 3
+                    v->arch.shadow.mode =
+                        &SHADOW_INTERNAL_NAME(sh_paging_mode, 3, 2);
+#else
+                    v->arch.shadow.mode =
+                        &SHADOW_INTERNAL_NAME(sh_paging_mode, 2, 2);
+#endif
+                }
+        }
+
+        if ( pagetable_get_pfn(v->arch.monitor_table) == 0 )
+        {
+            mfn_t mmfn = shadow_make_monitor_table(v);
+            v->arch.monitor_table = pagetable_from_mfn(mmfn);
+            v->arch.monitor_vtable = sh_map_domain_page(mmfn);
+        } 
+
+        if ( v->arch.shadow.mode != old_mode )
+        {
+            SHADOW_PRINTK("new paging mode: d=%u v=%u g=%u s=%u "
+                           "(was g=%u s=%u)\n",
+                           d->domain_id, v->vcpu_id, 
+                           v->arch.shadow.mode->guest_levels,
+                           v->arch.shadow.mode->shadow_levels,
+                           old_mode ? old_mode->guest_levels : 0,
+                           old_mode ? old_mode->shadow_levels : 0);
+            if ( old_mode &&
+                 (v->arch.shadow.mode->shadow_levels !=
+                  old_mode->shadow_levels) )
+            {
+                /* Need to make a new monitor table for the new mode */
+                mfn_t new_mfn, old_mfn;
+
+                if ( v != current ) 
+                {
+                    SHADOW_ERROR("Some third party (d=%u v=%u) is changing "
+                                  "this HVM vcpu's (d=%u v=%u) paging mode!\n",
+                                  current->domain->domain_id, current->vcpu_id,
+                                  v->domain->domain_id, v->vcpu_id);
+                    domain_crash(v->domain);
+                    return;
+                }
+
+                sh_unmap_domain_page(v->arch.monitor_vtable);
+                old_mfn = pagetable_get_mfn(v->arch.monitor_table);
+                v->arch.monitor_table = pagetable_null();
+                new_mfn = v->arch.shadow.mode->make_monitor_table(v);            
+                v->arch.monitor_table = pagetable_from_mfn(new_mfn);
+                v->arch.monitor_vtable = sh_map_domain_page(new_mfn);
+                SHADOW_PRINTK("new monitor table %"SH_PRI_mfn "\n",
+                               mfn_x(new_mfn));
+
+                /* Don't be running on the old monitor table when we 
+                 * pull it down!  Switch CR3, and warn the HVM code that
+                 * its host cr3 has changed. */
+                make_cr3(v, mfn_x(new_mfn));
+                write_ptbase(v);
+                hvm_update_host_cr3(v);
+                old_mode->destroy_monitor_table(v, old_mfn);
+            }
+        }
+
+        // XXX -- Need to deal with changes in CR4.PSE and CR4.PGE.
+        //        These are HARD: think about the case where two CPU's have
+        //        different values for CR4.PSE and CR4.PGE at the same time.
+        //        This *does* happen, at least for CR4.PGE...
+    }
+
+    v->arch.shadow.mode->update_cr3(v);
+}
+
+/**************************************************************************/
+/* Turning on and off shadow features */
+
+static void sh_new_mode(struct domain *d, u32 new_mode)
+/* Inform all the vcpus that the shadow mode has been changed */
+{
+    struct vcpu *v;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(d != current->domain);
+    d->arch.shadow.mode = new_mode;
+    if ( new_mode & SHM2_translate ) 
+        shadow_audit_p2m(d);
+    for_each_vcpu(d, v)
+        sh_update_paging_modes(v);
+}
+
+static int shadow_enable(struct domain *d, u32 mode)
+/* Turn on "permanent" shadow features: external, translate, refcount.
+ * Can only be called once on a domain, and these features cannot be
+ * disabled. 
+ * Returns 0 for success, -errno for failure. */
+{    
+    unsigned int old_pages;
+    int rv = 0;
+
+    mode |= SHM2_enable;
+
+    domain_pause(d);
+    shadow_lock(d);
+
+    /* Sanity check the arguments */
+    if ( (d == current->domain) ||
+         shadow_mode_enabled(d) ||
+         ((mode & SHM2_external) && !(mode & SHM2_translate)) )
+    {
+        rv = -EINVAL;
+        goto out;
+    }
+
+    // XXX -- eventually would like to require that all memory be allocated
+    // *after* shadow_enabled() is called...  So here, we would test to make
+    // sure that d->page_list is empty.
+#if 0
+    spin_lock(&d->page_alloc_lock);
+    if ( !list_empty(&d->page_list) )
+    {
+        spin_unlock(&d->page_alloc_lock);
+        rv = -EINVAL;
+        goto out;
+    }
+    spin_unlock(&d->page_alloc_lock);
+#endif
+
+    /* Init the shadow memory allocation if the user hasn't done so */
+    old_pages = d->arch.shadow.total_pages;
+    if ( old_pages == 0 )
+        if ( set_sh_allocation(d, 256, NULL) != 0 ) /* Use at least 1MB */
+        {
+            set_sh_allocation(d, 0, NULL);
+            rv = -ENOMEM;
+            goto out;
+        }
+
+    /* Init the hash table */
+    if ( shadow_hash_alloc(d) != 0 )
+    {
+        set_sh_allocation(d, old_pages, NULL);            
+        rv = -ENOMEM;
+        goto out;
+    }
+
+    /* Init the P2M table */
+    if ( mode & SHM2_translate )
+        if ( !shadow_alloc_p2m_table(d) )
+        {
+            shadow_hash_teardown(d);
+            set_sh_allocation(d, old_pages, NULL);
+            shadow_p2m_teardown(d);
+            rv = -ENOMEM;
+            goto out;
+        }
+
+    /* Update the bits */
+    sh_new_mode(d, mode);
+    shadow_audit_p2m(d);
+ out:
+    shadow_unlock(d);
+    domain_unpause(d);
+    return 0;
+}
+
+void shadow_teardown(struct domain *d)
+/* Destroy the shadow pagetables of this domain and free its shadow memory.
+ * Should only be called for dying domains. */
+{
+    struct vcpu *v;
+    mfn_t mfn;
+
+    ASSERT(test_bit(_DOMF_dying, &d->domain_flags));
+    ASSERT(d != current->domain);
+
+    if ( !shadow_lock_is_acquired(d) )
+        shadow_lock(d); /* Keep various asserts happy */
+
+    if ( shadow_mode_enabled(d) )
+    {
+        /* Release the shadow and monitor tables held by each vcpu */
+        for_each_vcpu(d, v)
+        {
+            shadow_detach_old_tables(v);
+            if ( shadow_mode_external(d) )
+            {
+                mfn = pagetable_get_mfn(v->arch.monitor_table);
+                if ( valid_mfn(mfn) && (mfn_x(mfn) != 0) )
+                    shadow_destroy_monitor_table(v, mfn);
+                v->arch.monitor_table = pagetable_null();
+            }
+        }
+    }
+
+    if ( d->arch.shadow.total_pages != 0 )
+    {
+        SHADOW_PRINTK("teardown of domain %u starts."
+                       "  Shadow pages total = %u, free = %u, p2m=%u\n",
+                       d->domain_id,
+                       d->arch.shadow.total_pages, 
+                       d->arch.shadow.free_pages, 
+                       d->arch.shadow.p2m_pages);
+        /* Destroy all the shadows and release memory to domheap */
+        set_sh_allocation(d, 0, NULL);
+        /* Release the hash table back to xenheap */
+        if (d->arch.shadow.hash_table) 
+            shadow_hash_teardown(d);
+        /* Release the log-dirty bitmap of dirtied pages */
+        sh_free_log_dirty_bitmap(d);
+        /* Should not have any more memory held */
+        SHADOW_PRINTK("teardown done."
+                       "  Shadow pages total = %u, free = %u, p2m=%u\n",
+                       d->arch.shadow.total_pages, 
+                       d->arch.shadow.free_pages, 
+                       d->arch.shadow.p2m_pages);
+        ASSERT(d->arch.shadow.total_pages == 0);
+    }
+
+    /* We leave the "permanent" shadow modes enabled, but clear the
+     * log-dirty mode bit.  We don't want any more mark_dirty()
+     * calls now that we've torn down the bitmap */
+    d->arch.shadow.mode &= ~SHM2_log_dirty;
+
+    shadow_unlock(d);
+}
+
+void shadow_final_teardown(struct domain *d)
+/* Called by arch_domain_destroy(), when it's safe to pull down the p2m map. */
+{
+
+    SHADOW_PRINTK("dom %u final teardown starts."
+                   "  Shadow pages total = %u, free = %u, p2m=%u\n",
+                   d->domain_id,
+                   d->arch.shadow.total_pages, 
+                   d->arch.shadow.free_pages, 
+                   d->arch.shadow.p2m_pages);
+
+    /* Double-check that the domain didn't have any shadow memory.  
+     * It is possible for a domain that never got domain_kill()ed
+     * to get here with its shadow allocation intact. */
+    if ( d->arch.shadow.total_pages != 0 )
+        shadow_teardown(d);
+
+    /* It is now safe to pull down the p2m map. */
+    if ( d->arch.shadow.p2m_pages != 0 )
+        shadow_p2m_teardown(d);
+
+    SHADOW_PRINTK("dom %u final teardown done."
+                   "  Shadow pages total = %u, free = %u, p2m=%u\n",
+                   d->domain_id,
+                   d->arch.shadow.total_pages, 
+                   d->arch.shadow.free_pages, 
+                   d->arch.shadow.p2m_pages);
+}
+
+static int shadow_one_bit_enable(struct domain *d, u32 mode)
+/* Turn on a single shadow mode feature */
+{
+    ASSERT(shadow_lock_is_acquired(d));
+
+    /* Sanity check the call */
+    if ( d == current->domain || (d->arch.shadow.mode & mode) )
+    {
+        return -EINVAL;
+    }
+
+    if ( d->arch.shadow.mode == 0 )
+    {
+        /* Init the shadow memory allocation and the hash table */
+        if ( set_sh_allocation(d, 1, NULL) != 0 
+             || shadow_hash_alloc(d) != 0 )
+        {
+            set_sh_allocation(d, 0, NULL);
+            return -ENOMEM;
+        }
+    }
+
+    /* Update the bits */
+    sh_new_mode(d, d->arch.shadow.mode | mode);
+
+    return 0;
+}
+
+static int shadow_one_bit_disable(struct domain *d, u32 mode) 
+/* Turn off a single shadow mode feature */
+{
+    struct vcpu *v;
+    ASSERT(shadow_lock_is_acquired(d));
+
+    /* Sanity check the call */
+    if ( d == current->domain || !(d->arch.shadow.mode & mode) )
+    {
+        return -EINVAL;
+    }
+
+    /* Update the bits */
+    sh_new_mode(d, d->arch.shadow.mode & ~mode);
+    if ( d->arch.shadow.mode == 0 )
+    {
+        /* Get this domain off shadows */
+        SHADOW_PRINTK("un-shadowing of domain %u starts."
+                       "  Shadow pages total = %u, free = %u, p2m=%u\n",
+                       d->domain_id,
+                       d->arch.shadow.total_pages, 
+                       d->arch.shadow.free_pages, 
+                       d->arch.shadow.p2m_pages);
+        for_each_vcpu(d, v)
+        {
+            shadow_detach_old_tables(v);
+#if CONFIG_PAGING_LEVELS == 4
+            if ( !(v->arch.flags & TF_kernel_mode) )
+                make_cr3(v, pagetable_get_pfn(v->arch.guest_table_user));
+            else
+#endif
+                make_cr3(v, pagetable_get_pfn(v->arch.guest_table));
+
+        }
+
+        /* Pull down the memory allocation */
+        if ( set_sh_allocation(d, 0, NULL) != 0 )
+        {
+            // XXX - How can this occur?
+            //       Seems like a bug to return an error now that we've
+            //       disabled the relevant shadow mode.
+            //
+            return -ENOMEM;
+        }
+        shadow_hash_teardown(d);
+        SHADOW_PRINTK("un-shadowing of domain %u done."
+                       "  Shadow pages total = %u, free = %u, p2m=%u\n",
+                       d->domain_id,
+                       d->arch.shadow.total_pages, 
+                       d->arch.shadow.free_pages, 
+                       d->arch.shadow.p2m_pages);
+    }
+
+    return 0;
+}
+
+/* Enable/disable ops for the "test" and "log-dirty" modes */
+int shadow_test_enable(struct domain *d)
+{
+    int ret;
+
+    domain_pause(d);
+    shadow_lock(d);
+
+    if ( shadow_mode_enabled(d) )
+    {
+        SHADOW_ERROR("Don't support enabling test mode"
+                      "on already shadowed doms\n");
+        ret = -EINVAL;
+        goto out;
+    }
+
+    ret = shadow_one_bit_enable(d, SHM2_enable);
+ out:
+    shadow_unlock(d);
+    domain_unpause(d);
+
+    return ret;
+}
+
+int shadow_test_disable(struct domain *d)
+{
+    int ret;
+
+    domain_pause(d);
+    shadow_lock(d);
+    ret = shadow_one_bit_disable(d, SHM2_enable);
+    shadow_unlock(d);
+    domain_unpause(d);
+
+    return ret;
+}
+
+static int
+sh_alloc_log_dirty_bitmap(struct domain *d)
+{
+    ASSERT(d->arch.shadow.dirty_bitmap == NULL);
+    d->arch.shadow.dirty_bitmap_size =
+        (d->shared_info->arch.max_pfn + (BITS_PER_LONG - 1)) &
+        ~(BITS_PER_LONG - 1);
+    d->arch.shadow.dirty_bitmap =
+        xmalloc_array(unsigned long,
+                      d->arch.shadow.dirty_bitmap_size / BITS_PER_LONG);
+    if ( d->arch.shadow.dirty_bitmap == NULL )
+    {
+        d->arch.shadow.dirty_bitmap_size = 0;
+        return -ENOMEM;
+    }
+    memset(d->arch.shadow.dirty_bitmap, 0, d->arch.shadow.dirty_bitmap_size/8);
+
+    return 0;
+}
+
+static void
+sh_free_log_dirty_bitmap(struct domain *d)
+{
+    d->arch.shadow.dirty_bitmap_size = 0;
+    if ( d->arch.shadow.dirty_bitmap )
+    {
+        xfree(d->arch.shadow.dirty_bitmap);
+        d->arch.shadow.dirty_bitmap = NULL;
+    }
+}
+
+static int shadow_log_dirty_enable(struct domain *d)
+{
+    int ret;
+
+    domain_pause(d);
+    shadow_lock(d);
+
+    if ( shadow_mode_log_dirty(d) )
+    {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    if ( shadow_mode_enabled(d) )
+    {
+        SHADOW_ERROR("Don't (yet) support enabling log-dirty"
+                      "on already shadowed doms\n");
+        ret = -EINVAL;
+        goto out;
+    }
+
+    ret = sh_alloc_log_dirty_bitmap(d);
+    if ( ret != 0 )
+    {
+        sh_free_log_dirty_bitmap(d);
+        goto out;
+    }
+
+    ret = shadow_one_bit_enable(d, SHM2_log_dirty);
+    if ( ret != 0 )
+        sh_free_log_dirty_bitmap(d);
+
+ out:
+    shadow_unlock(d);
+    domain_unpause(d);
+    return ret;
+}
+
+static int shadow_log_dirty_disable(struct domain *d)
+{
+    int ret;
+
+    domain_pause(d);
+    shadow_lock(d);
+    ret = shadow_one_bit_disable(d, SHM2_log_dirty);
+    if ( !shadow_mode_log_dirty(d) )
+        sh_free_log_dirty_bitmap(d);
+    shadow_unlock(d);
+    domain_unpause(d);
+
+    return ret;
+}
+
+/**************************************************************************/
+/* P2M map manipulations */
+
+static void
+sh_p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn)
+{
+    struct vcpu *v;
+
+    if ( !shadow_mode_translate(d) )
+        return;
+
+    v = current;
+    if ( v->domain != d )
+        v = d->vcpu[0];
+
+
+    SHADOW_DEBUG(P2M, "removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
+
+    ASSERT(mfn_x(sh_gfn_to_mfn(d, gfn)) == mfn);
+    //ASSERT(sh_mfn_to_gfn(d, mfn) == gfn);
+
+    shadow_remove_all_shadows_and_parents(v, _mfn(mfn));
+    if ( shadow_remove_all_mappings(v, _mfn(mfn)) )
+        flush_tlb_mask(d->domain_dirty_cpumask);
+    shadow_set_p2m_entry(d, gfn, _mfn(INVALID_MFN));
+    set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
+}
+
+void
+shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn,
+                                  unsigned long mfn)
+{
+    shadow_lock(d);
+    shadow_audit_p2m(d);
+    sh_p2m_remove_page(d, gfn, mfn);
+    shadow_audit_p2m(d);
+    shadow_unlock(d);    
+}
+
+void
+shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn,
+                               unsigned long mfn)
+{
+    struct vcpu *v;
+    unsigned long ogfn;
+    mfn_t omfn;
+
+    if ( !shadow_mode_translate(d) )
+        return;
+
+    v = current;
+    if ( v->domain != d )
+        v = d->vcpu[0];
+
+    shadow_lock(d);
+    shadow_audit_p2m(d);
+
+    SHADOW_DEBUG(P2M, "adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
+
+    omfn = sh_gfn_to_mfn(d, gfn);
+    if ( valid_mfn(omfn) )
+    {
+        /* Get rid of the old mapping, especially any shadows */
+        shadow_remove_all_shadows_and_parents(v, omfn);
+        if ( shadow_remove_all_mappings(v, omfn) )
+            flush_tlb_mask(d->domain_dirty_cpumask);
+        set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
+    }        
+
+    ogfn = sh_mfn_to_gfn(d, _mfn(mfn));
+    if (
+#ifdef __x86_64__
+        (ogfn != 0x5555555555555555L)
+#else
+        (ogfn != 0x55555555L)
+#endif
+        && (ogfn != INVALID_M2P_ENTRY)
+        && (ogfn != gfn) )
+    {
+        /* This machine frame is already mapped at another physical address */
+        SHADOW_DEBUG(P2M, "aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
+                       mfn, ogfn, gfn);
+        if ( valid_mfn(omfn = sh_gfn_to_mfn(d, ogfn)) ) 
+        {
+            SHADOW_DEBUG(P2M, "old gfn=%#lx -> mfn %#lx\n", 
+                           ogfn , mfn_x(omfn));
+            if ( mfn_x(omfn) == mfn ) 
+                sh_p2m_remove_page(d, ogfn, mfn);
+        }
+    }
+
+    shadow_set_p2m_entry(d, gfn, _mfn(mfn));
+    set_gpfn_from_mfn(mfn, gfn);
+    shadow_audit_p2m(d);
+    shadow_unlock(d);
+}
+
+/**************************************************************************/
+/* Log-dirty mode support */
+
+/* Convert a shadow to log-dirty mode. */
+void shadow_convert_to_log_dirty(struct vcpu *v, mfn_t smfn)
+{
+    BUG();
+}
+
+
+/* Read a domain's log-dirty bitmap and stats.  
+ * If the operation is a CLEAN, clear the bitmap and stats as well. */
+static int shadow_log_dirty_op(
+    struct domain *d, struct xen_domctl_shadow_op *sc)
+{
+    int i, rv = 0, clean = 0;
+
+    domain_pause(d);
+    shadow_lock(d);
+
+    clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
+
+    SHADOW_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n", 
+                  (clean) ? "clean" : "peek",
+                  d->domain_id,
+                  d->arch.shadow.fault_count, 
+                  d->arch.shadow.dirty_count);
+
+    sc->stats.fault_count = d->arch.shadow.fault_count;
+    sc->stats.dirty_count = d->arch.shadow.dirty_count;    
+        
+    if ( clean ) 
+    {
+        struct list_head *l, *t;
+        struct page_info *pg;
+
+        /* Need to revoke write access to the domain's pages again. 
+         * In future, we'll have a less heavy-handed approach to this, 
+         * but for now, we just unshadow everything except Xen. */
+        list_for_each_safe(l, t, &d->arch.shadow.toplevel_shadows)
+        {
+            pg = list_entry(l, struct page_info, list);
+            shadow_unhook_mappings(d->vcpu[0], page_to_mfn(pg));
+        }
+
+        d->arch.shadow.fault_count = 0;
+        d->arch.shadow.dirty_count = 0;
+    }
+
+    if ( guest_handle_is_null(sc->dirty_bitmap) ||
+         (d->arch.shadow.dirty_bitmap == NULL) )
+    {
+        rv = -EINVAL;
+        goto out;
+    }
+ 
+    if ( sc->pages > d->arch.shadow.dirty_bitmap_size )
+        sc->pages = d->arch.shadow.dirty_bitmap_size; 
+
+#define CHUNK (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
+    for ( i = 0; i < sc->pages; i += CHUNK )
+    {
+        int bytes = ((((sc->pages - i) > CHUNK) 
+                      ? CHUNK 
+                      : (sc->pages - i)) + 7) / 8;
+     
+        if ( copy_to_guest_offset(
+                 sc->dirty_bitmap, 
+                 i/(8*sizeof(unsigned long)),
+                 d->arch.shadow.dirty_bitmap + (i/(8*sizeof(unsigned long))),
+                 (bytes + sizeof(unsigned long) - 1) / sizeof(unsigned long)) )
+        {
+            rv = -EINVAL;
+            goto out;
+        }
+
+        if ( clean )
+            memset(d->arch.shadow.dirty_bitmap + (i/(8*sizeof(unsigned long))),
+                   0, bytes);
+    }
+#undef CHUNK
+
+ out:
+    shadow_unlock(d);
+    domain_unpause(d);
+    return 0;
+}
+
+
+/* Mark a page as dirty */
+void sh_do_mark_dirty(struct domain *d, mfn_t gmfn)
+{
+    unsigned long pfn;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(shadow_mode_log_dirty(d));
+
+    if ( !valid_mfn(gmfn) )
+        return;
+
+    ASSERT(d->arch.shadow.dirty_bitmap != NULL);
+
+    /* We /really/ mean PFN here, even for non-translated guests. */
+    pfn = get_gpfn_from_mfn(mfn_x(gmfn));
+
+    /*
+     * Values with the MSB set denote MFNs that aren't really part of the 
+     * domain's pseudo-physical memory map (e.g., the shared info frame).
+     * Nothing to do here...
+     */
+    if ( unlikely(!VALID_M2P(pfn)) )
+        return;
+
+    /* N.B. Can use non-atomic TAS because protected by shadow_lock. */
+    if ( likely(pfn < d->arch.shadow.dirty_bitmap_size) ) 
+    { 
+        if ( !__test_and_set_bit(pfn, d->arch.shadow.dirty_bitmap) )
+        {
+            SHADOW_DEBUG(LOGDIRTY, 
+                          "marked mfn %" SH_PRI_mfn " (pfn=%lx), dom %d\n",
+                          mfn_x(gmfn), pfn, d->domain_id);
+            d->arch.shadow.dirty_count++;
+        }
+    }
+    else
+    {
+        SHADOW_PRINTK("mark_dirty OOR! "
+                       "mfn=%" SH_PRI_mfn " pfn=%lx max=%x (dom %d)\n"
+                       "owner=%d c=%08x t=%" PRtype_info "\n",
+                       mfn_x(gmfn), 
+                       pfn, 
+                       d->arch.shadow.dirty_bitmap_size,
+                       d->domain_id,
+                       (page_get_owner(mfn_to_page(gmfn))
+                        ? page_get_owner(mfn_to_page(gmfn))->domain_id
+                        : -1),
+                       mfn_to_page(gmfn)->count_info, 
+                       mfn_to_page(gmfn)->u.inuse.type_info);
+    }
+}
+
+
+/**************************************************************************/
+/* Shadow-control XEN_DOMCTL dispatcher */
+
+int shadow_domctl(struct domain *d, 
+                   xen_domctl_shadow_op_t *sc,
+                   XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
+{
+    int rc, preempted = 0;
+
+    if ( unlikely(d == current->domain) )
+    {
+        DPRINTK("Don't try to do a shadow op on yourself!\n");
+        return -EINVAL;
+    }
+
+    switch ( sc->op )
+    {
+    case XEN_DOMCTL_SHADOW_OP_OFF:
+        if ( shadow_mode_log_dirty(d) )
+            if ( (rc = shadow_log_dirty_disable(d)) != 0 ) 
+                return rc;
+        if ( d->arch.shadow.mode & SHM2_enable )
+            if ( (rc = shadow_test_disable(d)) != 0 ) 
+                return rc;
+        return 0;
+
+    case XEN_DOMCTL_SHADOW_OP_ENABLE_TEST:
+        return shadow_test_enable(d);
+
+    case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
+        return shadow_log_dirty_enable(d);
+
+    case XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE:
+        return shadow_enable(d, SHM2_refcounts|SHM2_translate);
+
+    case XEN_DOMCTL_SHADOW_OP_CLEAN:
+    case XEN_DOMCTL_SHADOW_OP_PEEK:
+        return shadow_log_dirty_op(d, sc);
+
+    case XEN_DOMCTL_SHADOW_OP_ENABLE:
+        if ( sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY )
+            return shadow_log_dirty_enable(d);
+        return shadow_enable(d, sc->mode << SHM2_shift);
+
+    case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
+        sc->mb = shadow_get_allocation(d);
+        return 0;
+
+    case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
+        rc = shadow_set_allocation(d, sc->mb, &preempted);
+        if ( preempted )
+            /* Not finished.  Set up to re-run the call. */
+            rc = hypercall_create_continuation(
+                __HYPERVISOR_domctl, "h", u_domctl);
+        else 
+            /* Finished.  Return the new allocation */
+            sc->mb = shadow_get_allocation(d);
+        return rc;
+
+    default:
+        SHADOW_ERROR("Bad shadow op %u\n", sc->op);
+        return -EINVAL;
+    }
+}
+
+
+/**************************************************************************/
+/* Auditing shadow tables */
+
+#if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_FULL
+
+void shadow_audit_tables(struct vcpu *v) 
+{
+    /* Dispatch table for getting per-type functions */
+    static hash_callback_t callbacks[16] = {
+        NULL, /* none    */
+#if CONFIG_PAGING_LEVELS == 2
+        SHADOW_INTERNAL_NAME(sh_audit_l1_table,2,2),  /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_audit_fl1_table,2,2), /* fl1_32  */
+        SHADOW_INTERNAL_NAME(sh_audit_l2_table,2,2),  /* l2_32   */
+#else 
+        SHADOW_INTERNAL_NAME(sh_audit_l1_table,3,2),  /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_audit_fl1_table,3,2), /* fl1_32  */
+        SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,2),  /* l2_32   */
+        SHADOW_INTERNAL_NAME(sh_audit_l1_table,3,3),  /* l1_pae  */
+        SHADOW_INTERNAL_NAME(sh_audit_fl1_table,3,3), /* fl1_pae */
+        SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3),  /* l2_pae  */
+        SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3),  /* l2h_pae */
+        SHADOW_INTERNAL_NAME(sh_audit_l3_table,3,3),  /* l3_pae  */
+#if CONFIG_PAGING_LEVELS >= 4
+        SHADOW_INTERNAL_NAME(sh_audit_l1_table,4,4),  /* l1_64   */
+        SHADOW_INTERNAL_NAME(sh_audit_fl1_table,4,4), /* fl1_64  */
+        SHADOW_INTERNAL_NAME(sh_audit_l2_table,4,4),  /* l2_64   */
+        SHADOW_INTERNAL_NAME(sh_audit_l3_table,4,4),  /* l3_64   */
+        SHADOW_INTERNAL_NAME(sh_audit_l4_table,4,4),  /* l4_64   */
+#endif /* CONFIG_PAGING_LEVELS >= 4 */
+#endif /* CONFIG_PAGING_LEVELS > 2 */
+        NULL  /* All the rest */
+    };
+    unsigned int mask; 
+
+    if ( !(SHADOW_AUDIT_ENABLE) )
+        return;
+    
+    if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_FULL )
+        mask = ~1; /* Audit every table in the system */
+    else 
+    {
+        /* Audit only the current mode's tables */
+        switch ( v->arch.shadow.mode->guest_levels )
+        {
+        case 2: mask = (SHF_L1_32|SHF_FL1_32|SHF_L2_32); break;
+        case 3: mask = (SHF_L1_PAE|SHF_FL1_PAE|SHF_L2_PAE
+                        |SHF_L2H_PAE|SHF_L3_PAE); break;
+        case 4: mask = (SHF_L1_64|SHF_FL1_64|SHF_L2_64  
+                        |SHF_L3_64|SHF_L4_64); break;
+        default: BUG();
+        }
+    }
+
+    hash_foreach(v, ~1, callbacks, _mfn(INVALID_MFN));
+}
+
+#endif /* Shadow audit */
+
+
+/**************************************************************************/
+/* Auditing p2m tables */
+
+#if SHADOW_AUDIT & SHADOW_AUDIT_P2M
+
+void shadow_audit_p2m(struct domain *d)
+{
+    struct list_head *entry;
+    struct page_info *page;
+    struct domain *od;
+    unsigned long mfn, gfn, m2pfn, lp2mfn = 0;
+    mfn_t p2mfn;
+    unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0;
+    int test_linear;
+    
+    if ( !(SHADOW_AUDIT_ENABLE) || !shadow_mode_translate(d) )
+        return;
+
+    //SHADOW_PRINTK("p2m audit starts\n");
+
+    test_linear = ( (d == current->domain) && current->arch.monitor_vtable );
+    if ( test_linear )
+        local_flush_tlb(); 
+
+    /* Audit part one: walk the domain's page allocation list, checking 
+     * the m2p entries. */
+    for ( entry = d->page_list.next;
+          entry != &d->page_list;
+          entry = entry->next )
+    {
+        page = list_entry(entry, struct page_info, list);
+        mfn = mfn_x(page_to_mfn(page));
+
+        // SHADOW_PRINTK("auditing guest page, mfn=%#lx\n", mfn); 
+
+        od = page_get_owner(page);
+
+        if ( od != d ) 
+        {
+            SHADOW_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n",
+                           mfn, od, (od?od->domain_id:-1), d, d->domain_id);
+            continue;
+        }
+
+        gfn = get_gpfn_from_mfn(mfn);
+        if ( gfn == INVALID_M2P_ENTRY ) 
+        {
+            orphans_i++;
+            //SHADOW_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n",
+            //               mfn); 
+            continue;
+        }
+
+        if ( gfn == 0x55555555 ) 
+        {
+            orphans_d++;
+            //SHADOW_PRINTK("orphaned guest page: mfn=%#lx has debug gfn\n", 
+            //               mfn); 
+            continue;
+        }
+
+        p2mfn = sh_gfn_to_mfn_foreign(d, gfn);
+        if ( mfn_x(p2mfn) != mfn )
+        {
+            mpbad++;
+            SHADOW_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx"
+                           " (-> gfn %#lx)\n",
+                           mfn, gfn, mfn_x(p2mfn),
+                           (mfn_valid(p2mfn)
+                            ? get_gpfn_from_mfn(mfn_x(p2mfn))
+                            : -1u));
+            /* This m2p entry is stale: the domain has another frame in
+             * this physical slot.  No great disaster, but for neatness,
+             * blow away the m2p entry. */ 
+            set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
+        }
+
+        if ( test_linear )
+        {
+            lp2mfn = get_mfn_from_gpfn(gfn);
+            if ( lp2mfn != mfn_x(p2mfn) )
+            {
+                SHADOW_PRINTK("linear mismatch gfn %#lx -> mfn %#lx "
+                               "(!= mfn %#lx)\n", gfn, lp2mfn, p2mfn);
+            }
+        }
+
+        // SHADOW_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx, lp2mfn=%#lx\n", 
+        //                mfn, gfn, p2mfn, lp2mfn); 
+    }   
+
+    /* Audit part two: walk the domain's p2m table, checking the entries. */
+    if ( pagetable_get_pfn(d->arch.phys_table) != 0 )
+    {
+        l2_pgentry_t *l2e;
+        l1_pgentry_t *l1e;
+        int i1, i2;
+        
+#if CONFIG_PAGING_LEVELS == 4
+        l4_pgentry_t *l4e;
+        l3_pgentry_t *l3e;
+        int i3, i4;
+        l4e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
+#elif CONFIG_PAGING_LEVELS == 3
+        l3_pgentry_t *l3e;
+        int i3;
+        l3e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
+#else /* CONFIG_PAGING_LEVELS == 2 */
+        l2e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
+#endif
+
+        gfn = 0;
+#if CONFIG_PAGING_LEVELS >= 3
+#if CONFIG_PAGING_LEVELS >= 4
+        for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
+        {
+            if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
+            {
+                gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT);
+                continue;
+            }
+            l3e = sh_map_domain_page(_mfn(l4e_get_pfn(l4e[i4])));
+#endif /* now at levels 3 or 4... */
+            for ( i3 = 0; 
+                  i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8); 
+                  i3++ )
+            {
+                if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
+                {
+                    gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
+                    continue;
+                }
+                l2e = sh_map_domain_page(_mfn(l3e_get_pfn(l3e[i3])));
+#endif /* all levels... */
+                for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
+                {
+                    if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
+                    {
+                        gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
+                        continue;
+                    }
+                    l1e = sh_map_domain_page(_mfn(l2e_get_pfn(l2e[i2])));
+                    
+                    for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
+                    {
+                        if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
+                            continue;
+                        mfn = l1e_get_pfn(l1e[i1]);
+                        ASSERT(valid_mfn(_mfn(mfn)));
+                        m2pfn = get_gpfn_from_mfn(mfn);
+                        if ( m2pfn != gfn )
+                        {
+                            pmbad++;
+                            SHADOW_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
+                                           " -> gfn %#lx\n", gfn, mfn, m2pfn);
+                            BUG();
+                        }
+                    }
+                    sh_unmap_domain_page(l1e);
+                }
+#if CONFIG_PAGING_LEVELS >= 3
+                sh_unmap_domain_page(l2e);
+            }
+#if CONFIG_PAGING_LEVELS >= 4
+            sh_unmap_domain_page(l3e);
+        }
+#endif
+#endif
+
+#if CONFIG_PAGING_LEVELS == 4
+        sh_unmap_domain_page(l4e);
+#elif CONFIG_PAGING_LEVELS == 3
+        sh_unmap_domain_page(l3e);
+#else /* CONFIG_PAGING_LEVELS == 2 */
+        sh_unmap_domain_page(l2e);
+#endif
+
+    }
+
+    //SHADOW_PRINTK("p2m audit complete\n");
+    //if ( orphans_i | orphans_d | mpbad | pmbad ) 
+    //    SHADOW_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n",
+    //                   orphans_i + orphans_d, orphans_i, orphans_d,
+    if ( mpbad | pmbad ) 
+        SHADOW_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n",
+                       pmbad, mpbad);
+}
+
+#endif /* p2m audit */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End: 
+ */
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c

new file mode 100644 (file)

index 0000000..aed36c5
--- /dev/null
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -0,0 +1,4492 @@
+/******************************************************************************
+ * arch/x86/mm/shadow/multi.c
+ *
+ * Simple, mostly-synchronous shadow page tables. 
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+// DESIGN QUESTIONS:
+// Why use subshadows for PAE guests?
+// - reduces pressure in the hash table
+// - reduces shadow size (64-vs-4096 bytes of shadow for 32 bytes of guest L3)
+// - would need to find space in the page_info to store 7 more bits of
+//   backpointer
+// - independent shadows of 32 byte chunks makes it non-obvious how to quickly
+//   figure out when to demote the guest page from l3 status
+//
+// PAE Xen HVM guests are restricted to 8GB of pseudo-physical address space.
+// - Want to map the P2M table into the 16MB RO_MPT hole in Xen's address
+//   space for both PV and HVM guests.
+//
+
+#define SHADOW 1
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/mm.h>
+#include <xen/trace.h>
+#include <xen/sched.h>
+#include <xen/perfc.h>
+#include <xen/domain_page.h>
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/shadow.h>
+#include <asm/flushtlb.h>
+#include <asm/hvm/hvm.h>
+#include "private.h"
+#include "types.h"
+
+/* The first cut: an absolutely synchronous, trap-and-emulate version,
+ * supporting only HVM guests (and so only "external" shadow mode). 
+ *
+ * THINGS TO DO LATER:
+ * 
+ * FIX GVA_TO_GPA
+ * The current interface returns an unsigned long, which is not big enough
+ * to hold a physical address in PAE.  Should return a gfn instead.
+ * 
+ * TEARDOWN HEURISTICS
+ * Also: have a heuristic for when to destroy a previous paging-mode's 
+ * shadows.  When a guest is done with its start-of-day 32-bit tables
+ * and reuses the memory we want to drop those shadows.  Start with 
+ * shadows in a page in two modes as a hint, but beware of clever tricks 
+ * like reusing a pagetable for both PAE and 64-bit during boot...
+ *
+ * PAE LINEAR MAPS
+ * Rework shadow_get_l*e() to have the option of using map_domain_page()
+ * instead of linear maps.  Add appropriate unmap_l*e calls in the users. 
+ * Then we can test the speed difference made by linear maps.  If the 
+ * map_domain_page() version is OK on PAE, we could maybe allow a lightweight 
+ * l3-and-l2h-only shadow mode for PAE PV guests that would allow them 
+ * to share l2h pages again. 
+ *
+ * PAE L3 COPYING
+ * In this code, we copy all 32 bytes of a PAE L3 every time we change an 
+ * entry in it, and every time we change CR3.  We copy it for the linear 
+ * mappings (ugh! PAE linear mappings) and we copy it to the low-memory
+ * buffer so it fits in CR3.  Maybe we can avoid some of this recopying 
+ * by using the shadow directly in some places. 
+ * Also, for SMP, need to actually respond to seeing shadow.pae_flip_pending.
+ *
+ * GUEST_WALK_TABLES TLB FLUSH COALESCE
+ * guest_walk_tables can do up to three remote TLB flushes as it walks to
+ * the first l1 of a new pagetable.  Should coalesce the flushes to the end, 
+ * and if we do flush, re-do the walk.  If anything has changed, then 
+ * pause all the other vcpus and do the walk *again*.
+ *
+ * WP DISABLED
+ * Consider how to implement having the WP bit of CR0 set to 0.  
+ * Since we need to be able to cause write faults to pagetables, this might
+ * end up looking like not having the (guest) pagetables present at all in 
+ * HVM guests...
+ *
+ * PSE disabled / PSE36
+ * We don't support any modes other than PSE enabled, PSE36 disabled.
+ * Neither of those would be hard to change, but we'd need to be able to 
+ * deal with shadows made in one mode and used in another.
+ */
+
+#define FETCH_TYPE_PREFETCH 1
+#define FETCH_TYPE_DEMAND   2
+#define FETCH_TYPE_WRITE    4
+typedef enum {
+    ft_prefetch     = FETCH_TYPE_PREFETCH,
+    ft_demand_read  = FETCH_TYPE_DEMAND,
+    ft_demand_write = FETCH_TYPE_DEMAND | FETCH_TYPE_WRITE,
+} fetch_type_t;
+
+#ifdef DEBUG_TRACE_DUMP
+static char *fetch_type_names[] = {
+    [ft_prefetch]     "prefetch",
+    [ft_demand_read]  "demand read",
+    [ft_demand_write] "demand write",
+};
+#endif
+
+/* XXX forward declarations */
+#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
+static unsigned long hvm_pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab, int clear_res);
+#endif
+static inline void sh_update_linear_entries(struct vcpu *v);
+
+/**************************************************************************/
+/* Hash table mapping from guest pagetables to shadows
+ *
+ * Normal case: maps the mfn of a guest page to the mfn of its shadow page.
+ * FL1's:       maps the *gfn* of the start of a superpage to the mfn of a
+ *              shadow L1 which maps its "splinters".
+ * PAE CR3s:    maps the 32-byte aligned, 32-bit CR3 value to the mfn of the
+ *              PAE L3 info page for that CR3 value.
+ */
+
+static inline mfn_t 
+get_fl1_shadow_status(struct vcpu *v, gfn_t gfn)
+/* Look for FL1 shadows in the hash table */
+{
+    mfn_t smfn = shadow_hash_lookup(v, gfn_x(gfn),
+                                     PGC_SH_fl1_shadow >> PGC_SH_type_shift);
+
+    if ( unlikely(shadow_mode_log_dirty(v->domain) && valid_mfn(smfn)) )
+    {
+        struct page_info *page = mfn_to_page(smfn);
+        if ( !(page->count_info & PGC_SH_log_dirty) )
+            shadow_convert_to_log_dirty(v, smfn);
+    }
+
+    return smfn;
+}
+
+static inline mfn_t 
+get_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type)
+/* Look for shadows in the hash table */
+{
+    mfn_t smfn = shadow_hash_lookup(v, mfn_x(gmfn),
+                                     shadow_type >> PGC_SH_type_shift);
+    perfc_incrc(shadow_get_shadow_status);
+
+    if ( unlikely(shadow_mode_log_dirty(v->domain) && valid_mfn(smfn)) )
+    {
+        struct page_info *page = mfn_to_page(smfn);
+        if ( !(page->count_info & PGC_SH_log_dirty) )
+            shadow_convert_to_log_dirty(v, smfn);
+    }
+
+    return smfn;
+}
+
+static inline void 
+set_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
+/* Put an FL1 shadow into the hash table */
+{
+    SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
+                   gfn_x(gfn), PGC_SH_fl1_shadow, mfn_x(smfn));
+
+    if ( unlikely(shadow_mode_log_dirty(v->domain)) )
+        // mark this shadow as a log dirty shadow...
+        set_bit(_PGC_SH_log_dirty, &mfn_to_page(smfn)->count_info);
+    else
+        clear_bit(_PGC_SH_log_dirty, &mfn_to_page(smfn)->count_info);
+
+    shadow_hash_insert(v, gfn_x(gfn),
+                        PGC_SH_fl1_shadow >> PGC_SH_type_shift, smfn);
+}
+
+static inline void 
+set_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
+/* Put a shadow into the hash table */
+{
+    struct domain *d = v->domain;
+    int res;
+
+    SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, type=%08x, smfn=%05lx\n",
+                   d->domain_id, v->vcpu_id, mfn_x(gmfn),
+                   shadow_type, mfn_x(smfn));
+
+    if ( unlikely(shadow_mode_log_dirty(d)) )
+        // mark this shadow as a log dirty shadow...
+        set_bit(_PGC_SH_log_dirty, &mfn_to_page(smfn)->count_info);
+    else
+        clear_bit(_PGC_SH_log_dirty, &mfn_to_page(smfn)->count_info);
+
+    res = get_page(mfn_to_page(gmfn), d);
+    ASSERT(res == 1);
+
+    shadow_hash_insert(v, mfn_x(gmfn), shadow_type >> PGC_SH_type_shift,
+                        smfn);
+}
+
+static inline void 
+delete_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
+/* Remove a shadow from the hash table */
+{
+    SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
+                   gfn_x(gfn), PGC_SH_fl1_shadow, mfn_x(smfn));
+
+    shadow_hash_delete(v, gfn_x(gfn),
+                        PGC_SH_fl1_shadow >> PGC_SH_type_shift, smfn);
+}
+
+static inline void 
+delete_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
+/* Remove a shadow from the hash table */
+{
+    SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, type=%08x, smfn=%05lx\n",
+                   v->domain->domain_id, v->vcpu_id,
+                   mfn_x(gmfn), shadow_type, mfn_x(smfn));
+    shadow_hash_delete(v, mfn_x(gmfn),
+                        shadow_type >> PGC_SH_type_shift, smfn);
+    put_page(mfn_to_page(gmfn));
+}
+
+/**************************************************************************/
+/* CPU feature support querying */
+
+static inline int
+guest_supports_superpages(struct vcpu *v)
+{
+    /* The _PAGE_PSE bit must be honoured in HVM guests, whenever
+     * CR4.PSE is set or the guest is in PAE or long mode */
+    return (hvm_guest(v) && (GUEST_PAGING_LEVELS != 2 
+                             || (hvm_get_guest_ctrl_reg(v, 4) & X86_CR4_PSE)));
+}
+
+static inline int
+guest_supports_nx(struct vcpu *v)
+{
+    if ( !hvm_guest(v) )
+        return cpu_has_nx;
+
+    // XXX - fix this!
+    return 1;
+}
+
+
+/**************************************************************************/
+/* Functions for walking the guest page tables */
+
+
+/* Walk the guest pagetables, filling the walk_t with what we see. 
+ * Takes an uninitialised walk_t.  The caller must call unmap_walk() 
+ * on the walk_t before discarding it or calling guest_walk_tables again. 
+ * If "guest_op" is non-zero, we are serving a genuine guest memory access, 
+ * and must (a) be under the shadow lock, and (b) remove write access
+ * from any gueat PT pages we see, as we will be using their contents to 
+ * perform shadow updates.
+ * Returns 0 for success or non-zero if the guest pagetables are malformed.
+ * N.B. Finding a not-present entry does not cause a non-zero return code. */
+static inline int 
+guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op)
+{
+    ASSERT(!guest_op || shadow_lock_is_acquired(v->domain));
+
+    perfc_incrc(shadow_guest_walk);
+    memset(gw, 0, sizeof(*gw));
+    gw->va = va;
+
+#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
+#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
+    /* Get l4e from the top level table */
+    gw->l4mfn = pagetable_get_mfn(v->arch.guest_table);
+    gw->l4e = (guest_l4e_t *)v->arch.guest_vtable + guest_l4_table_offset(va);
+    /* Walk down to the l3e */
+    if ( !(guest_l4e_get_flags(*gw->l4e) & _PAGE_PRESENT) ) return 0;
+    gw->l3mfn = vcpu_gfn_to_mfn(v, guest_l4e_get_gfn(*gw->l4e));
+    if ( !valid_mfn(gw->l3mfn) ) return 1;
+    /* This mfn is a pagetable: make sure the guest can't write to it. */
+    if ( guest_op && shadow_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
+        flush_tlb_mask(v->domain->domain_dirty_cpumask); 
+    gw->l3e = ((guest_l3e_t *)sh_map_domain_page(gw->l3mfn))
+        + guest_l3_table_offset(va);
+#else /* PAE only... */
+    /* Get l3e from the top level table */
+    gw->l3mfn = pagetable_get_mfn(v->arch.guest_table);
+    gw->l3e = (guest_l3e_t *)v->arch.guest_vtable + guest_l3_table_offset(va);
+#endif /* PAE or 64... */
+    /* Walk down to the l2e */
+    if ( !(guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT) ) return 0;
+    gw->l2mfn = vcpu_gfn_to_mfn(v, guest_l3e_get_gfn(*gw->l3e));
+    if ( !valid_mfn(gw->l2mfn) ) return 1;
+    /* This mfn is a pagetable: make sure the guest can't write to it. */
+    if ( guest_op && shadow_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
+        flush_tlb_mask(v->domain->domain_dirty_cpumask); 
+    gw->l2e = ((guest_l2e_t *)sh_map_domain_page(gw->l2mfn))
+        + guest_l2_table_offset(va);
+#else /* 32-bit only... */
+    /* Get l2e from the top level table */
+    gw->l2mfn = pagetable_get_mfn(v->arch.guest_table);
+    gw->l2e = (guest_l2e_t *)v->arch.guest_vtable + guest_l2_table_offset(va);
+#endif /* All levels... */
+    
+    if ( !(guest_l2e_get_flags(*gw->l2e) & _PAGE_PRESENT) ) return 0;
+    if ( guest_supports_superpages(v) &&
+         (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE) ) 
+    {
+        /* Special case: this guest VA is in a PSE superpage, so there's
+         * no guest l1e.  We make one up so that the propagation code
+         * can generate a shadow l1 table.  Start with the gfn of the 
+         * first 4k-page of the superpage. */
+        gfn_t start = guest_l2e_get_gfn(*gw->l2e);
+        /* Grant full access in the l1e, since all the guest entry's 
+         * access controls are enforced in the shadow l2e.  This lets 
+         * us reflect l2 changes later without touching the l1s. */
+        int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
+                     _PAGE_ACCESSED|_PAGE_DIRTY);
+        /* PSE level 2 entries use bit 12 for PAT; propagate it to bit 7
+         * of the level 1 */
+        if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE_PAT) ) 
+            flags |= _PAGE_PAT; 
+        /* Increment the pfn by the right number of 4k pages.  
+         * The ~0x1 is to mask out the PAT bit mentioned above. */
+        start = _gfn((gfn_x(start) & ~0x1) + guest_l1_table_offset(va));
+        gw->eff_l1e = guest_l1e_from_gfn(start, flags);
+        gw->l1e = NULL;
+        gw->l1mfn = _mfn(INVALID_MFN);
+    } 
+    else 
+    {
+        /* Not a superpage: carry on and find the l1e. */
+        gw->l1mfn = vcpu_gfn_to_mfn(v, guest_l2e_get_gfn(*gw->l2e));
+        if ( !valid_mfn(gw->l1mfn) ) return 1;
+        /* This mfn is a pagetable: make sure the guest can't write to it. */
+        if ( guest_op 
+             && shadow_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
+            flush_tlb_mask(v->domain->domain_dirty_cpumask); 
+        gw->l1e = ((guest_l1e_t *)sh_map_domain_page(gw->l1mfn))
+            + guest_l1_table_offset(va);
+        gw->eff_l1e = *gw->l1e;
+    }
+
+    return 0;
+}
+
+/* Given a walk_t, translate the gw->va into the guest's notion of the
+ * corresponding frame number. */
+static inline gfn_t
+guest_walk_to_gfn(walk_t *gw)
+{
+    if ( !(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT) )
+        return _gfn(INVALID_GFN);
+    return guest_l1e_get_gfn(gw->eff_l1e);
+}
+
+/* Given a walk_t, translate the gw->va into the guest's notion of the
+ * corresponding physical address. */
+static inline paddr_t
+guest_walk_to_gpa(walk_t *gw)
+{
+    if ( !(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT) )
+        return 0;
+    return guest_l1e_get_paddr(gw->eff_l1e) + (gw->va & ~PAGE_MASK);
+}
+
+
+/* Unmap (and reinitialise) a guest walk.  
+ * Call this to dispose of any walk filled in by guest_walk_tables() */
+static void unmap_walk(struct vcpu *v, walk_t *gw)
+{
+#if GUEST_PAGING_LEVELS >= 3
+#if GUEST_PAGING_LEVELS >= 4
+    if ( gw->l3e != NULL ) sh_unmap_domain_page(gw->l3e);
+#endif
+    if ( gw->l2e != NULL ) sh_unmap_domain_page(gw->l2e);
+#endif
+    if ( gw->l1e != NULL ) sh_unmap_domain_page(gw->l1e);
+#ifdef DEBUG
+    memset(gw, 0, sizeof(*gw));
+#endif
+}
+
+
+/* Pretty-print the contents of a guest-walk */
+static inline void print_gw(walk_t *gw)
+{
+    SHADOW_PRINTK("GUEST WALK TO %#lx:\n", gw->va);
+#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
+#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
+    SHADOW_PRINTK("   l4mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l4mfn));
+    SHADOW_PRINTK("   l4e=%p\n", gw->l4e);
+    if ( gw->l4e )
+        SHADOW_PRINTK("   *l4e=%" SH_PRI_gpte "\n", gw->l4e->l4);
+#endif /* PAE or 64... */
+    SHADOW_PRINTK("   l3mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l3mfn));
+    SHADOW_PRINTK("   l3e=%p\n", gw->l3e);
+    if ( gw->l3e )
+        SHADOW_PRINTK("   *l3e=%" SH_PRI_gpte "\n", gw->l3e->l3);
+#endif /* All levels... */
+    SHADOW_PRINTK("   l2mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l2mfn));
+    SHADOW_PRINTK("   l2e=%p\n", gw->l2e);
+    if ( gw->l2e )
+        SHADOW_PRINTK("   *l2e=%" SH_PRI_gpte "\n", gw->l2e->l2);
+    SHADOW_PRINTK("   l1mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l1mfn));
+    SHADOW_PRINTK("   l1e=%p\n", gw->l1e);
+    if ( gw->l1e )
+        SHADOW_PRINTK("   *l1e=%" SH_PRI_gpte "\n", gw->l1e->l1);
+    SHADOW_PRINTK("   eff_l1e=%" SH_PRI_gpte "\n", gw->eff_l1e.l1);
+}
+
+
+#if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES
+/* Lightweight audit: pass all the shadows associated with this guest walk
+ * through the audit mechanisms */
+static void sh_audit_gw(struct vcpu *v, walk_t *gw) 
+{
+    mfn_t smfn;
+
+    if ( !(SHADOW_AUDIT_ENABLE) )
+        return;
+
+#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
+#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
+    if ( valid_mfn(gw->l4mfn)
+         && valid_mfn((smfn = get_shadow_status(v, gw->l4mfn, 
+                                                PGC_SH_l4_shadow))) )
+        (void) sh_audit_l4_table(v, smfn, _mfn(INVALID_MFN));
+#endif /* PAE or 64... */
+    if ( valid_mfn(gw->l3mfn)
+         && valid_mfn((smfn = get_shadow_status(v, gw->l3mfn, 
+                                                PGC_SH_l3_shadow))) )
+        (void) sh_audit_l3_table(v, smfn, _mfn(INVALID_MFN));
+#endif /* All levels... */
+    if ( valid_mfn(gw->l2mfn) )
+    {
+        if ( valid_mfn((smfn = get_shadow_status(v, gw->l2mfn, 
+                                                 PGC_SH_l2_shadow))) )
+            (void) sh_audit_l2_table(v, smfn, _mfn(INVALID_MFN));
+#if GUEST_PAGING_LEVELS == 3
+        if ( valid_mfn((smfn = get_shadow_status(v, gw->l2mfn, 
+                                                 PGC_SH_l2h_shadow))) )
+            (void) sh_audit_l2_table(v, smfn, _mfn(INVALID_MFN));
+#endif
+    }
+    if ( valid_mfn(gw->l1mfn)
+         && valid_mfn((smfn = get_shadow_status(v, gw->l1mfn, 
+                                                PGC_SH_l1_shadow))) )
+        (void) sh_audit_l1_table(v, smfn, _mfn(INVALID_MFN));
+    else if ( gw->l2e
+              && (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE)
+              && valid_mfn( 
+              (smfn = get_fl1_shadow_status(v, guest_l2e_get_gfn(*gw->l2e)))) )
+        (void) sh_audit_fl1_table(v, smfn, _mfn(INVALID_MFN));
+}
+
+#else
+#define sh_audit_gw(_v, _gw) do {} while(0)
+#endif /* audit code */
+
+
+
+/**************************************************************************/
+/* Function to write to the guest tables, for propagating accessed and 
+ * dirty bits from the shadow to the guest.
+ * Takes a guest mfn, a pointer to the guest entry, the level of pagetable,
+ * and an operation type.  The guest entry is always passed as an l1e: 
+ * since we only ever write flags, that's OK.
+ * Returns the new flag bits of the guest entry. */
+
+static u32 guest_set_ad_bits(struct vcpu *v,
+                             mfn_t gmfn, 
+                             guest_l1e_t *ep,
+                             unsigned int level, 
+                             fetch_type_t ft)
+{
+    u32 flags, shflags, bit;
+    struct page_info *pg;
+    int res = 0;
+
+    ASSERT(valid_mfn(gmfn)
+           && (sh_mfn_is_a_page_table(gmfn)
+               || ((mfn_to_page(gmfn)->u.inuse.type_info & PGT_count_mask) 
+                   == 0)));
+    ASSERT(ep && !(((unsigned long)ep) & ((sizeof *ep) - 1)));
+    ASSERT(level <= GUEST_PAGING_LEVELS);
+    ASSERT(ft == ft_demand_read || ft == ft_demand_write);
+    ASSERT(shadow_lock_is_acquired(v->domain));
+
+    flags = guest_l1e_get_flags(*ep);
+
+    /* PAE l3s do not have A and D bits */
+    if ( unlikely(GUEST_PAGING_LEVELS == 3 && level == 3) )
+        return flags;
+
+    /* Need the D bit as well for writes, in l1es and 32bit/PAE PSE l2es. */
+    if ( ft == ft_demand_write  
+         && (level == 1 || 
+             (level == 2 && GUEST_PAGING_LEVELS < 4 
+              && (flags & _PAGE_PSE) && guest_supports_superpages(v))) )
+    {
+        if ( (flags & (_PAGE_DIRTY | _PAGE_ACCESSED)) 
+             == (_PAGE_DIRTY | _PAGE_ACCESSED) )
+            return flags;  /* Guest already has A and D bits set */
+        flags |= _PAGE_DIRTY | _PAGE_ACCESSED;
+        perfc_incrc(shadow_ad_update);
+    }
+    else 
+    {
+        if ( flags & _PAGE_ACCESSED )
+            return flags;  /* Guest already has A bit set */
+        flags |= _PAGE_ACCESSED;
+        perfc_incrc(shadow_a_update);
+    }
+
+    /* Set the bit(s) */
+    sh_mark_dirty(v->domain, gmfn);
+    SHADOW_DEBUG(A_AND_D, "gfn = %"SH_PRI_gfn", "
+                  "old flags = %#x, new flags = %#x\n", 
+                  guest_l1e_get_gfn(*ep), guest_l1e_get_flags(*ep), flags);
+    *ep = guest_l1e_from_gfn(guest_l1e_get_gfn(*ep), flags);
+    
+    /* May need to propagate this change forward to other kinds of shadow */
+    pg = mfn_to_page(gmfn);
+    if ( !sh_mfn_is_a_page_table(gmfn) ) 
+    {
+        /* This guest pagetable is not yet shadowed at all. */
+        // MAF: I think this assert is busted...  If this gmfn has not yet
+        // been promoted, then it seems perfectly reasonable for there to be
+        // outstanding type refs to it...
+        /* TJD: No. If the gmfn has not been promoted, we must at least 
+         * have recognised that it is a pagetable, and pulled write access.
+         * The type count should only be non-zero if it is actually a page 
+         * table.  The test above was incorrect, though, so I've fixed it. */
+        ASSERT((pg->u.inuse.type_info & PGT_count_mask) == 0);
+        return flags;  
+    }
+
+    shflags = pg->shadow_flags & SHF_page_type_mask;
+    while ( shflags )
+    {
+        bit = find_first_set_bit(shflags);
+        ASSERT(shflags & (1u << bit));
+        shflags &= ~(1u << bit);
+        if ( !(pg->shadow_flags & (1u << bit)) )
+            continue;
+        switch ( bit )
+        {
+        case PGC_SH_type_to_index(PGC_SH_l1_shadow):
+            if (level != 1) 
+                res |= sh_map_and_validate_gl1e(v, gmfn, ep, sizeof (*ep));
+            break;
+        case PGC_SH_type_to_index(PGC_SH_l2_shadow):
+            if (level != 2) 
+                res |= sh_map_and_validate_gl2e(v, gmfn, ep, sizeof (*ep));
+            break;
+#if GUEST_PAGING_LEVELS == 3 /* PAE only */
+        case PGC_SH_type_to_index(PGC_SH_l2h_shadow):
+            if (level != 2) 
+                res |= sh_map_and_validate_gl2he(v, gmfn, ep, sizeof (*ep));
+            break;
+#endif
+#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
+        case PGC_SH_type_to_index(PGC_SH_l3_shadow):
+            if (level != 3) 
+                res |= sh_map_and_validate_gl3e(v, gmfn, ep, sizeof (*ep));
+            break;
+#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
+        case PGC_SH_type_to_index(PGC_SH_l4_shadow):
+            if (level != 4) 
+                res |= sh_map_and_validate_gl4e(v, gmfn, ep, sizeof (*ep));
+            break;
+#endif 
+#endif
+        default:
+            SHADOW_ERROR("mfn %"SH_PRI_mfn" is shadowed in multiple "
+                          "modes: A&D bits may be out of sync (flags=%#x).\n", 
+                          mfn_x(gmfn), pg->shadow_flags); 
+            /* XXX Shadows in other modes will not be updated, so will
+             * have their A and D bits out of sync. */
+        }
+    }
+    
+    /* We should never need to flush the TLB or recopy PAE entries */
+    ASSERT( res == 0 || res == SHADOW_SET_CHANGED );
+    return flags;
+}
+
+/**************************************************************************/
+/* Functions to compute the correct index into a shadow page, given an
+ * index into the guest page (as returned by guest_get_index()).
+ * This is trivial when the shadow and guest use the same sized PTEs, but
+ * gets more interesting when those sizes are mismatched (e.g. 32-bit guest,
+ * PAE- or 64-bit shadows).
+ *
+ * These functions also increment the shadow mfn, when necessary.  When PTE
+ * sizes are mismatched, it takes 2 shadow L1 pages for a single guest L1
+ * page.  In this case, we allocate 2 contiguous pages for the shadow L1, and
+ * use simple pointer arithmetic on a pointer to the guest L1e to figure out
+ * which shadow page we really want.  Similarly, when PTE sizes are
+ * mismatched, we shadow a guest L2 page with 4 shadow L2 pages.  (The easiest
+ * way to see this is: a 32-bit guest L2 page maps 4GB of virtual address
+ * space, while a PAE- or 64-bit shadow L2 page maps 1GB of virtual address
+ * space.)
+ *
+ * For PAE guests, for every 32-bytes of guest L3 page table, we use 64-bytes
+ * of shadow (to store both the shadow, and the info that would normally be
+ * stored in page_info fields).  This arrangement allows the shadow and the
+ * "page_info" fields to always be stored in the same page (in fact, in
+ * the same cache line), avoiding an extra call to map_domain_page().
+ */
+
+static inline u32
+guest_index(void *ptr)
+{
+    return (u32)((unsigned long)ptr & ~PAGE_MASK) / sizeof(guest_l1e_t);
+}
+
+static inline u32
+shadow_l1_index(mfn_t *smfn, u32 guest_index)
+{
+#if (GUEST_PAGING_LEVELS == 2) && (SHADOW_PAGING_LEVELS != 2)
+    *smfn = _mfn(mfn_x(*smfn) +
+                 (guest_index / SHADOW_L1_PAGETABLE_ENTRIES));
+    return (guest_index % SHADOW_L1_PAGETABLE_ENTRIES);
+#else
+    return guest_index;
+#endif
+}
+
+static inline u32
+shadow_l2_index(mfn_t *smfn, u32 guest_index)
+{
+#if (GUEST_PAGING_LEVELS == 2) && (SHADOW_PAGING_LEVELS != 2)
+    // Because we use 2 shadow l2 entries for each guest entry, the number of
+    // guest entries per shadow page is SHADOW_L2_PAGETABLE_ENTRIES/2
+    //
+    *smfn = _mfn(mfn_x(*smfn) +
+                 (guest_index / (SHADOW_L2_PAGETABLE_ENTRIES / 2)));
+
+    // We multiple by two to get the index of the first of the two entries
+    // used to shadow the specified guest entry.
+    return (guest_index % (SHADOW_L2_PAGETABLE_ENTRIES / 2)) * 2;
+#else
+    return guest_index;
+#endif
+}
+
+#if GUEST_PAGING_LEVELS >= 3
+
+static inline u32
+shadow_l3_index(mfn_t *smfn, u32 guest_index)
+{
+#if GUEST_PAGING_LEVELS == 3
+    u32 group_id;
+
+    // Because we use twice the space in L3 shadows as was consumed in guest
+    // L3s, the number of guest entries per shadow page is
+    // SHADOW_L2_PAGETABLE_ENTRIES/2.  (Note this is *not*
+    // SHADOW_L3_PAGETABLE_ENTRIES, which in this case is 4...)
+    //
+    *smfn = _mfn(mfn_x(*smfn) +
+                 (guest_index / (SHADOW_L2_PAGETABLE_ENTRIES / 2)));
+
+    // We store PAE L3 shadows in groups of 4, alternating shadows and
+    // pae_l3_bookkeeping structs.  So the effective shadow index is
+    // the the group_id * 8 + the offset within the group.
+    //
+    guest_index %= (SHADOW_L2_PAGETABLE_ENTRIES / 2);
+    group_id = guest_index / 4;
+    return (group_id * 8) + (guest_index % 4);
+#else
+    return guest_index;
+#endif
+}
+
+#endif // GUEST_PAGING_LEVELS >= 3
+
+#if GUEST_PAGING_LEVELS >= 4
+
+static inline u32
+shadow_l4_index(mfn_t *smfn, u32 guest_index)
+{
+    return guest_index;
+}
+
+#endif // GUEST_PAGING_LEVELS >= 4
+
+
+/**************************************************************************/
+/* Functions which compute shadow entries from their corresponding guest
+ * entries.
+ *
+ * These are the "heart" of the shadow code.
+ *
+ * There are two sets of these: those that are called on demand faults (read
+ * faults and write faults), and those that are essentially called to
+ * "prefetch" (or propagate) entries from the guest into the shadow.  The read
+ * fault and write fault are handled as two separate cases for L1 entries (due
+ * to the _PAGE_DIRTY bit handling), but for L[234], they are grouped together
+ * into the respective demand_fault functions.
+ */
+
+#define CHECK(_cond)                                    \
+do {                                                    \
+    if (unlikely(!(_cond)))                             \
+    {                                                   \
+        printk("%s %s %d ASSERTION (%s) FAILED\n",      \
+               __func__, __FILE__, __LINE__, #_cond);   \
+        return -1;                                      \
+    }                                                   \
+} while (0);
+
+// The function below tries to capture all of the flag manipulation for the
+// demand and propagate functions into one place.
+//
+static always_inline u32
+sh_propagate_flags(struct vcpu *v, mfn_t target_mfn, 
+                    u32 gflags, guest_l1e_t *guest_entry_ptr, mfn_t gmfn, 
+                    int mmio, int level, fetch_type_t ft)
+{
+    struct domain *d = v->domain;
+    u32 pass_thru_flags;
+    u32 sflags;
+
+    // XXX -- might want to think about PAT support for HVM guests...
+
+#ifndef NDEBUG
+    // MMIO can only occur from L1e's
+    //
+    if ( mmio )
+        CHECK(level == 1);
+
+    // We should always have a pointer to the guest entry if it's a non-PSE
+    // non-MMIO demand access.
+    if ( ft & FETCH_TYPE_DEMAND )
+        CHECK(guest_entry_ptr || level == 1);
+#endif
+
+    // A not-present guest entry has a special signature in the shadow table,
+    // so that we do not have to consult the guest tables multiple times...
+    //
+    if ( unlikely(!(gflags & _PAGE_PRESENT)) )
+        return _PAGE_SHADOW_GUEST_NOT_PRESENT;
+
+    // Must have a valid target_mfn, unless this is mmio, or unless this is a
+    // prefetch.  In the case of a prefetch, an invalid mfn means that we can
+    // not usefully shadow anything, and so we return early.
+    //
+    if ( !valid_mfn(target_mfn) )
+    {
+        CHECK((ft == ft_prefetch) || mmio);
+        if ( !mmio )
+            return 0;
+    }
+
+    // PAE does not allow NX, RW, USER, ACCESSED, or DIRTY bits in its L3e's...
+    //
+    if ( (SHADOW_PAGING_LEVELS == 3) && (level == 3) )
+        pass_thru_flags = _PAGE_PRESENT;
+    else
+    {
+        pass_thru_flags = (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER |
+                           _PAGE_RW | _PAGE_PRESENT);
+        if ( guest_supports_nx(v) )
+            pass_thru_flags |= _PAGE_NX_BIT;
+    }
+
+    // PAE guests can not put NX, RW, USER, ACCESSED, or DIRTY bits into their
+    // L3e's; they are all implied.  So we emulate them here.
+    //
+    if ( (GUEST_PAGING_LEVELS == 3) && (level == 3) )
+        gflags = pass_thru_flags;
+
+    // Propagate bits from the guest to the shadow.
+    // Some of these may be overwritten, below.
+    // Since we know the guest's PRESENT bit is set, we also set the shadow's
+    // SHADOW_PRESENT bit.
+    //
+    sflags = (gflags & pass_thru_flags) | _PAGE_SHADOW_PRESENT;
+
+    // Copy the guest's RW bit into the SHADOW_RW bit.
+    //
+    if ( gflags & _PAGE_RW )
+        sflags |= _PAGE_SHADOW_RW;
+
+    // Set the A&D bits for higher level shadows.
+    // Higher level entries do not, strictly speaking, have dirty bits, but
+    // since we use shadow linear tables, each of these entries may, at some
+    // point in time, also serve as a shadow L1 entry.
+    // By setting both the  A&D bits in each of these, we eliminate the burden
+    // on the hardware to update these bits on initial accesses.
+    //
+    if ( (level > 1) && !((SHADOW_PAGING_LEVELS == 3) && (level == 3)) )
+        sflags |= _PAGE_ACCESSED | _PAGE_DIRTY;
+
+
+    // Set the A and D bits in the guest entry, if we need to.
+    if ( guest_entry_ptr && (ft & FETCH_TYPE_DEMAND) )
+        gflags = guest_set_ad_bits(v, gmfn, guest_entry_ptr, level, ft);
+    
+    // If the A or D bit has not yet been set in the guest, then we must
+    // prevent the corresponding kind of access.
+    //
+    if ( unlikely(!((GUEST_PAGING_LEVELS == 3) && (level == 3)) &&
+                  !(gflags & _PAGE_ACCESSED)) )
+        sflags &= ~_PAGE_PRESENT;
+
+    /* D bits exist in l1es, and 32bit/PAE PSE l2es, but not 64bit PSE l2es */
+    if ( unlikely( ((level == 1) 
+                    || ((level == 2) && (GUEST_PAGING_LEVELS < 4) 
+                        && guest_supports_superpages(v) &&
+                        (gflags & _PAGE_PSE)))
+                   && !(gflags & _PAGE_DIRTY)) )
+        sflags &= ~_PAGE_RW;
+
+    // MMIO caching
+    //
+    // MMIO mappings are marked as not present, but we set the SHADOW_MMIO bit
+    // to cache the fact that this entry  is in MMIO space.
+    //
+    if ( (level == 1) && mmio )
+    {
+        sflags &= ~(_PAGE_PRESENT);
+        sflags |= _PAGE_SHADOW_MMIO;
+    }
+    else 
+    {
+        // shadow_mode_log_dirty support
+        //
+        // Only allow the guest write access to a page a) on a demand fault,
+        // or b) if the page is already marked as dirty.
+        //
+        if ( unlikely((level == 1) &&
+                      !(ft & FETCH_TYPE_WRITE) &&
+                      shadow_mode_log_dirty(d) &&
+                      !sh_mfn_is_dirty(d, target_mfn)) )
+        {
+            sflags &= ~_PAGE_RW;
+        }
+        
+        // protect guest page tables
+        //
+        if ( unlikely((level == 1) &&
+                      sh_mfn_is_a_page_table(target_mfn)) )
+        {
+            if ( shadow_mode_trap_reads(d) )
+            {
+                // if we are trapping both reads & writes, then mark this page
+                // as not present...
+                //
+                sflags &= ~_PAGE_PRESENT;
+            }
+            else
+            {
+                // otherwise, just prevent any writes...
+                //
+                sflags &= ~_PAGE_RW;
+            }
+        }
+    }
+
+    return sflags;
+}
+
+#undef CHECK
+
+#if GUEST_PAGING_LEVELS >= 4
+static void
+l4e_propagate_from_guest(struct vcpu *v, 
+                         guest_l4e_t *gl4e,
+                         mfn_t gl4mfn,
+                         mfn_t sl3mfn,
+                         shadow_l4e_t *sl4p,
+                         fetch_type_t ft)
+{
+    u32 gflags = guest_l4e_get_flags(*gl4e);
+    u32 sflags = sh_propagate_flags(v, sl3mfn, gflags, (guest_l1e_t *) gl4e,
+                                     gl4mfn, 0, 4, ft);
+
+    *sl4p = shadow_l4e_from_mfn(sl3mfn, sflags);
+
+    SHADOW_DEBUG(PROPAGATE,
+                  "%s gl4e=%" SH_PRI_gpte " sl4e=%" SH_PRI_pte "\n",
+                  fetch_type_names[ft], gl4e->l4, sl4p->l4);
+    ASSERT(sflags != -1);
+}
+#endif // GUEST_PAGING_LEVELS >= 4
+
+#if GUEST_PAGING_LEVELS >= 3
+static void
+l3e_propagate_from_guest(struct vcpu *v,
+                         guest_l3e_t *gl3e,
+                         mfn_t gl3mfn, 
+                         mfn_t sl2mfn, 
+                         shadow_l3e_t *sl3p,
+                         fetch_type_t ft)
+{
+    u32 gflags = guest_l3e_get_flags(*gl3e);
+    u32 sflags = sh_propagate_flags(v, sl2mfn, gflags, (guest_l1e_t *) gl3e,
+                                     gl3mfn, 0, 3, ft);
+
+    *sl3p = shadow_l3e_from_mfn(sl2mfn, sflags);
+
+    SHADOW_DEBUG(PROPAGATE,
+                  "%s gl3e=%" SH_PRI_gpte " sl3e=%" SH_PRI_pte "\n",
+                  fetch_type_names[ft], gl3e->l3, sl3p->l3);
+    ASSERT(sflags != -1);
+}
+#endif // GUEST_PAGING_LEVELS >= 3
+
+static void
+l2e_propagate_from_guest(struct vcpu *v, 
+                         guest_l2e_t *gl2e,
+                         mfn_t gl2mfn,
+                         mfn_t sl1mfn, 
+                         shadow_l2e_t *sl2p,
+                         fetch_type_t ft)
+{
+    u32 gflags = guest_l2e_get_flags(*gl2e);
+    u32 sflags = sh_propagate_flags(v, sl1mfn, gflags, (guest_l1e_t *) gl2e, 
+                                     gl2mfn, 0, 2, ft);
+
+    *sl2p = shadow_l2e_from_mfn(sl1mfn, sflags);
+
+    SHADOW_DEBUG(PROPAGATE,
+                  "%s gl2e=%" SH_PRI_gpte " sl2e=%" SH_PRI_pte "\n",
+                  fetch_type_names[ft], gl2e->l2, sl2p->l2);
+    ASSERT(sflags != -1);
+}
+
+static inline int
+l1e_read_fault(struct vcpu *v, walk_t *gw, mfn_t gmfn, shadow_l1e_t *sl1p,
+               int mmio)
+/* returns 1 if emulation is required, and 0 otherwise */
+{
+    struct domain *d = v->domain;
+    u32 gflags = guest_l1e_get_flags(gw->eff_l1e);
+    u32 sflags = sh_propagate_flags(v, gmfn, gflags, gw->l1e, gw->l1mfn,
+                                     mmio, 1, ft_demand_read);
+
+    if ( shadow_mode_trap_reads(d) && !mmio && sh_mfn_is_a_page_table(gmfn) )
+    {
+        // emulation required!
+        *sl1p = shadow_l1e_empty();
+        return 1;
+    }
+
+    *sl1p = shadow_l1e_from_mfn(gmfn, sflags);
+
+    SHADOW_DEBUG(PROPAGATE,
+                  "va=%p eff_gl1e=%" SH_PRI_gpte " sl1e=%" SH_PRI_pte "\n",
+                  (void *)gw->va, gw->eff_l1e.l1, sl1p->l1);
+
+    ASSERT(sflags != -1);
+    return 0;
+}
+
+static inline int
+l1e_write_fault(struct vcpu *v, walk_t *gw, mfn_t gmfn, shadow_l1e_t *sl1p,
+                int mmio)
+/* returns 1 if emulation is required, and 0 otherwise */
+{
+    struct domain *d = v->domain;
+    u32 gflags = guest_l1e_get_flags(gw->eff_l1e);
+    u32 sflags = sh_propagate_flags(v, gmfn, gflags, gw->l1e, gw->l1mfn,
+                                     mmio, 1, ft_demand_write);
+
+    sh_mark_dirty(d, gmfn);
+
+    if ( !mmio && sh_mfn_is_a_page_table(gmfn) )
+    {
+        // emulation required!
+        *sl1p = shadow_l1e_empty();
+        return 1;
+    }
+
+    *sl1p = shadow_l1e_from_mfn(gmfn, sflags);
+
+    SHADOW_DEBUG(PROPAGATE,
+                  "va=%p eff_gl1e=%" SH_PRI_gpte " sl1e=%" SH_PRI_pte "\n",
+                  (void *)gw->va, gw->eff_l1e.l1, sl1p->l1);
+
+    ASSERT(sflags != -1);
+    return 0;
+}
+
+static inline void
+l1e_propagate_from_guest(struct vcpu *v, guest_l1e_t gl1e, shadow_l1e_t *sl1p,
+                         int mmio)
+{
+    gfn_t gfn = guest_l1e_get_gfn(gl1e);
+    mfn_t gmfn = (mmio) ? _mfn(gfn_x(gfn)) : vcpu_gfn_to_mfn(v, gfn);
+    u32 gflags = guest_l1e_get_flags(gl1e);
+    u32 sflags = sh_propagate_flags(v, gmfn, gflags, 0, _mfn(INVALID_MFN), 
+                                     mmio, 1, ft_prefetch);
+
+    *sl1p = shadow_l1e_from_mfn(gmfn, sflags);
+
+    SHADOW_DEBUG(PROPAGATE,
+                  "gl1e=%" SH_PRI_gpte " sl1e=%" SH_PRI_pte "\n",
+                  gl1e.l1, sl1p->l1);
+
+    ASSERT(sflags != -1);
+}
+
+
+/**************************************************************************/
+/* These functions update shadow entries (and do bookkeeping on the shadow
+ * tables they are in).  It is intended that they are the only
+ * functions which ever write (non-zero) data onto a shadow page.
+ *
+ * They return a set of flags: 
+ * SHADOW_SET_CHANGED -- we actually wrote a new value to the shadow.
+ * SHADOW_SET_FLUSH   -- the caller must cause a TLB flush.
+ * SHADOW_SET_ERROR   -- the input is not a valid entry (for example, if
+ *                        shadow_get_page_from_l1e() fails).
+ * SHADOW_SET_L3PAE_RECOPY -- one or more vcpu's need to have their local
+ *                             copies of their PAE L3 entries re-copied.
+ */
+
+static inline void safe_write_entry(void *dst, void *src) 
+/* Copy one PTE safely when processors might be running on the
+ * destination pagetable.   This does *not* give safety against
+ * concurrent writes (that's what the shadow lock is for), just 
+ * stops the hardware picking up partially written entries. */
+{
+    volatile unsigned long *d = dst;
+    unsigned long *s = src;
+    ASSERT(!((unsigned long) d & (sizeof (shadow_l1e_t) - 1)));
+#if CONFIG_PAGING_LEVELS == 3
+    /* In PAE mode, pagetable entries are larger
+     * than machine words, so won't get written atomically.  We need to make
+     * sure any other cpu running on these shadows doesn't see a
+     * half-written entry.  Do this by marking the entry not-present first,
+     * then writing the high word before the low word. */
+    BUILD_BUG_ON(sizeof (shadow_l1e_t) != 2 * sizeof (unsigned long));
+    d[0] = 0;
+    d[1] = s[1];
+    d[0] = s[0];
+#else
+    /* In 32-bit and 64-bit, sizeof(pte) == sizeof(ulong) == 1 word,
+     * which will be an atomic write, since the entry is aligned. */
+    BUILD_BUG_ON(sizeof (shadow_l1e_t) != sizeof (unsigned long));
+    *d = *s;
+#endif
+}
+
+
+static inline void 
+shadow_write_entries(void *d, void *s, int entries, mfn_t mfn)
+/* This function does the actual writes to shadow pages.
+ * It must not be called directly, since it doesn't do the bookkeeping
+ * that shadow_set_l*e() functions do. */
+{
+    shadow_l1e_t *dst = d;
+    shadow_l1e_t *src = s;
+    void *map = NULL;
+    int i;
+
+    /* Because we mirror access rights at all levels in the shadow, an
+     * l2 (or higher) entry with the RW bit cleared will leave us with
+     * no write access through the linear map.  
+     * We detect that by writing to the shadow with copy_to_user() and 
+     * using map_domain_page() to get a writeable mapping if we need to. */
+    if ( __copy_to_user(d, d, sizeof (unsigned long)) != 0 ) 
+    {
+        perfc_incrc(shadow_linear_map_failed);
+        map = sh_map_domain_page(mfn);
+        ASSERT(map != NULL);
+        dst = map + ((unsigned long)dst & (PAGE_SIZE - 1));
+    }
+
+
+    for ( i = 0; i < entries; i++ )
+        safe_write_entry(dst++, src++);
+
+    if ( map != NULL ) sh_unmap_domain_page(map);
+
+    /* XXX TODO:
+     * Update min/max field in page_info struct of this mfn */
+}
+
+static inline int
+perms_strictly_increased(u32 old_flags, u32 new_flags) 
+/* Given the flags of two entries, are the new flags a strict
+ * increase in rights over the old ones? */
+{
+    u32 of = old_flags & (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX);
+    u32 nf = new_flags & (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX);
+    /* Flip the NX bit, since it's the only one that decreases rights;
+     * we calculate as if it were an "X" bit. */
+    of ^= _PAGE_NX_BIT;
+    nf ^= _PAGE_NX_BIT;
+    /* If the changed bits are all set in the new flags, then rights strictly 
+     * increased between old and new. */
+    return ((of | (of ^ nf)) == nf);
+}
+
+static int inline
+shadow_get_page_from_l1e(shadow_l1e_t sl1e, struct domain *d)
+{
+    int res;
+    mfn_t mfn;
+    struct domain *owner;
+    shadow_l1e_t sanitized_sl1e =
+        shadow_l1e_remove_flags(sl1e, _PAGE_SHADOW_RW | _PAGE_SHADOW_PRESENT);
+
+    //ASSERT(shadow_l1e_get_flags(sl1e) & _PAGE_PRESENT);
+    //ASSERT((shadow_l1e_get_flags(sl1e) & L1_DISALLOW_MASK) == 0);
+
+    if ( !shadow_mode_refcounts(d) )
+        return 1;
+
+    res = get_page_from_l1e(sanitized_sl1e, d);
+
+    // If a privileged domain is attempting to install a map of a page it does
+    // not own, we let it succeed anyway.
+    //
+    if ( unlikely(!res) &&
+         IS_PRIV(d) &&
+         !shadow_mode_translate(d) &&
+         valid_mfn(mfn = shadow_l1e_get_mfn(sl1e)) &&
+         (owner = page_get_owner(mfn_to_page(mfn))) &&
+         (d != owner) )
+    {
+        res = get_page_from_l1e(sanitized_sl1e, owner);
+        SHADOW_PRINTK("privileged domain %d installs map of mfn %05lx "
+                       "which is owned by domain %d: %s\n",
+                       d->domain_id, mfn_x(mfn), owner->domain_id,
+                       res ? "success" : "failed");
+    }
+
+    if ( unlikely(!res) )
+    {
+        perfc_incrc(shadow_get_page_fail);
+        SHADOW_PRINTK("failed: l1e=" SH_PRI_pte "\n");
+    }
+
+    return res;
+}
+
+static void inline
+shadow_put_page_from_l1e(shadow_l1e_t sl1e, struct domain *d)
+{ 
+    if ( !shadow_mode_refcounts(d) )
+        return;
+
+    put_page_from_l1e(sl1e, d);
+}
+
+#if GUEST_PAGING_LEVELS >= 4
+static int shadow_set_l4e(struct vcpu *v, 
+                          shadow_l4e_t *sl4e, 
+                          shadow_l4e_t new_sl4e, 
+                          mfn_t sl4mfn)
+{
+    int flags = 0;
+    shadow_l4e_t old_sl4e;
+    paddr_t paddr;
+    ASSERT(sl4e != NULL);
+    old_sl4e = *sl4e;
+
+    if ( old_sl4e.l4 == new_sl4e.l4 ) return 0; /* Nothing to do */
+    
+    paddr = ((((paddr_t)mfn_x(sl4mfn)) << PAGE_SHIFT) 
+             | (((unsigned long)sl4e) & ~PAGE_MASK));
+
+    if ( shadow_l4e_get_flags(new_sl4e) & _PAGE_PRESENT ) 
+    {
+        /* About to install a new reference */        
+        sh_get_ref(shadow_l4e_get_mfn(new_sl4e), paddr);
+    } 
+
+    /* Write the new entry */
+    shadow_write_entries(sl4e, &new_sl4e, 1, sl4mfn);
+    flags |= SHADOW_SET_CHANGED;
+
+    if ( shadow_l4e_get_flags(old_sl4e) & _PAGE_PRESENT ) 
+    {
+        /* We lost a reference to an old mfn. */
+        mfn_t osl3mfn = shadow_l4e_get_mfn(old_sl4e);
+        if ( (mfn_x(osl3mfn) != mfn_x(shadow_l4e_get_mfn(new_sl4e)))
+             || !perms_strictly_increased(shadow_l4e_get_flags(old_sl4e), 
+                                          shadow_l4e_get_flags(new_sl4e)) )
+        {
+            flags |= SHADOW_SET_FLUSH;
+        }
+        sh_put_ref(v, osl3mfn, paddr);
+    }
+    return flags;
+}
+#endif /* GUEST_PAGING_LEVELS >= 4 */
+
+#if GUEST_PAGING_LEVELS >= 3
+static int shadow_set_l3e(struct vcpu *v, 
+                          shadow_l3e_t *sl3e, 
+                          shadow_l3e_t new_sl3e, 
+                          mfn_t sl3mfn)
+{
+    int flags = 0;
+    shadow_l3e_t old_sl3e;
+    paddr_t paddr;
+    ASSERT(sl3e != NULL);
+    old_sl3e = *sl3e;
+
+    if ( old_sl3e.l3 == new_sl3e.l3 ) return 0; /* Nothing to do */
+
+    paddr = ((((paddr_t)mfn_x(sl3mfn)) << PAGE_SHIFT) 
+             | (((unsigned long)sl3e) & ~PAGE_MASK));
+    
+    if ( shadow_l3e_get_flags(new_sl3e) & _PAGE_PRESENT ) 
+    {
+        /* About to install a new reference */        
+        sh_get_ref(shadow_l3e_get_mfn(new_sl3e), paddr);
+    } 
+
+    /* Write the new entry */
+    shadow_write_entries(sl3e, &new_sl3e, 1, sl3mfn);
+    flags |= SHADOW_SET_CHANGED;
+
+#if GUEST_PAGING_LEVELS == 3 
+    /* We wrote a guest l3e in a PAE pagetable.  This table is copied in
+     * the linear pagetable entries of its l2s, and may also be copied
+     * to a low memory location to make it fit in CR3.  Report that we
+     * need to resync those copies (we can't wait for the guest to flush
+     * the TLB because it might be an increase in rights). */
+    {
+        struct vcpu *vcpu;
+
+        struct pae_l3_bookkeeping *info = sl3p_to_info(sl3e);
+        for_each_vcpu(v->domain, vcpu)
+        {
+            if (info->vcpus & (1 << vcpu->vcpu_id))
+            {
+                // Remember that this flip/update needs to occur.
+                vcpu->arch.shadow.pae_flip_pending = 1;
+                flags |= SHADOW_SET_L3PAE_RECOPY;
+            }
+        }
+    }
+#endif
+
+    if ( shadow_l3e_get_flags(old_sl3e) & _PAGE_PRESENT ) 
+    {
+        /* We lost a reference to an old mfn. */
+        mfn_t osl2mfn = shadow_l3e_get_mfn(old_sl3e);
+        if ( (mfn_x(osl2mfn) != mfn_x(shadow_l3e_get_mfn(new_sl3e))) ||
+             !perms_strictly_increased(shadow_l3e_get_flags(old_sl3e), 
+                                       shadow_l3e_get_flags(new_sl3e)) ) 
+        {
+            flags |= SHADOW_SET_FLUSH;
+        }
+        sh_put_ref(v, osl2mfn, paddr);
+    }
+    return flags;
+}
+#endif /* GUEST_PAGING_LEVELS >= 3 */ 
+
+static int shadow_set_l2e(struct vcpu *v, 
+                          shadow_l2e_t *sl2e, 
+                          shadow_l2e_t new_sl2e, 
+                          mfn_t sl2mfn)
+{
+    int flags = 0;
+    shadow_l2e_t old_sl2e;
+    paddr_t paddr;
+
+#if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
+    /* In 2-on-3 we work with pairs of l2es pointing at two-page
+     * shadows.  Reference counting and up-pointers track from the first
+     * page of the shadow to the first l2e, so make sure that we're 
+     * working with those:     
+     * Align the pointer down so it's pointing at the first of the pair */
+    sl2e = (shadow_l2e_t *)((unsigned long)sl2e & ~(sizeof(shadow_l2e_t)));
+    /* Align the mfn of the shadow entry too */
+    new_sl2e.l2 &= ~(1<<PAGE_SHIFT);
+#endif
+
+    ASSERT(sl2e != NULL);
+    old_sl2e = *sl2e;
+    
+    if ( old_sl2e.l2 == new_sl2e.l2 ) return 0; /* Nothing to do */
+    
+    paddr = ((((paddr_t)mfn_x(sl2mfn)) << PAGE_SHIFT)
+             | (((unsigned long)sl2e) & ~PAGE_MASK));
+
+    if ( shadow_l2e_get_flags(new_sl2e) & _PAGE_PRESENT ) 
+    {
+        /* About to install a new reference */
+        sh_get_ref(shadow_l2e_get_mfn(new_sl2e), paddr);
+    } 
+
+    /* Write the new entry */
+#if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
+    {
+        shadow_l2e_t pair[2] = { new_sl2e, new_sl2e };
+        /* The l1 shadow is two pages long and need to be pointed to by
+         * two adjacent l1es.  The pair have the same flags, but point
+         * at odd and even MFNs */
+        ASSERT(!(pair[0].l2 & (1<<PAGE_SHIFT)));
+        pair[1].l2 |= (1<<PAGE_SHIFT);
+        shadow_write_entries(sl2e, &pair, 2, sl2mfn);
+    }
+#else /* normal case */
+    shadow_write_entries(sl2e, &new_sl2e, 1, sl2mfn);
+#endif
+    flags |= SHADOW_SET_CHANGED;
+
+    if ( shadow_l2e_get_flags(old_sl2e) & _PAGE_PRESENT ) 
+    {
+        /* We lost a reference to an old mfn. */
+        mfn_t osl1mfn = shadow_l2e_get_mfn(old_sl2e);
+        if ( (mfn_x(osl1mfn) != mfn_x(shadow_l2e_get_mfn(new_sl2e))) ||
+             !perms_strictly_increased(shadow_l2e_get_flags(old_sl2e), 
+                                       shadow_l2e_get_flags(new_sl2e)) ) 
+        {
+            flags |= SHADOW_SET_FLUSH;
+        }
+        sh_put_ref(v, osl1mfn, paddr);
+    }
+    return flags;
+}
+
+static int shadow_set_l1e(struct vcpu *v, 
+                          shadow_l1e_t *sl1e, 
+                          shadow_l1e_t new_sl1e,
+                          mfn_t sl1mfn)
+{
+    int flags = 0;
+    struct domain *d = v->domain;
+    shadow_l1e_t old_sl1e;
+    ASSERT(sl1e != NULL);
+    
+    old_sl1e = *sl1e;
+
+    if ( old_sl1e.l1 == new_sl1e.l1 ) return 0; /* Nothing to do */
+    
+    if ( shadow_l1e_get_flags(new_sl1e) & _PAGE_PRESENT ) 
+    {
+        /* About to install a new reference */        
+        if ( shadow_mode_refcounts(d) ) {
+            if ( shadow_get_page_from_l1e(new_sl1e, d) == 0 ) 
+            {
+                /* Doesn't look like a pagetable. */
+                flags |= SHADOW_SET_ERROR;
+                new_sl1e = shadow_l1e_empty();
+            }
+        }
+    } 
+
+    /* Write the new entry */
+    shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
+    flags |= SHADOW_SET_CHANGED;
+
+    if ( shadow_l1e_get_flags(old_sl1e) & _PAGE_PRESENT ) 
+    {
+        /* We lost a reference to an old mfn. */
+        /* N.B. Unlike higher-level sets, never need an extra flush 
+         * when writing an l1e.  Because it points to the same guest frame 
+         * as the guest l1e did, it's the guest's responsibility to
+         * trigger a flush later. */
+        if ( shadow_mode_refcounts(d) ) 
+        {
+            shadow_put_page_from_l1e(old_sl1e, d);
+        } 
+    }
+    return flags;
+}
+
+
+/**************************************************************************/
+/* These functions take a vcpu and a virtual address, and return a pointer
+ * to the appropriate level N entry from the shadow tables.  
+ * If the necessary tables are not present in the shadow, they return NULL. */
+
+/* N.B. The use of GUEST_PAGING_LEVELS here is correct.  If the shadow has
+ * more levels than the guest, the upper levels are always fixed and do not 
+ * reflect any information from the guest, so we do not use these functions 
+ * to access them. */
+
+#if GUEST_PAGING_LEVELS >= 4
+static shadow_l4e_t *
+shadow_get_l4e(struct vcpu *v, unsigned long va)
+{
+    /* Reading the top level table is always valid. */
+    return sh_linear_l4_table(v) + shadow_l4_linear_offset(va);
+}
+#endif /* GUEST_PAGING_LEVELS >= 4 */
+
+
+#if GUEST_PAGING_LEVELS >= 3
+static shadow_l3e_t *
+shadow_get_l3e(struct vcpu *v, unsigned long va)
+{
+#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
+    /* Get the l4 */
+    shadow_l4e_t *sl4e = shadow_get_l4e(v, va);
+    ASSERT(sl4e != NULL);
+    if ( !(shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT) )
+        return NULL;
+    ASSERT(valid_mfn(shadow_l4e_get_mfn(*sl4e)));
+    /* l4 was present; OK to get the l3 */
+    return sh_linear_l3_table(v) + shadow_l3_linear_offset(va);
+#else /* PAE... */
+    /* Top level is always mapped */
+    ASSERT(v->arch.shadow_vtable);
+    return ((shadow_l3e_t *)v->arch.shadow_vtable) + shadow_l3_linear_offset(va);
+#endif 
+}
+#endif /* GUEST_PAGING_LEVELS >= 3 */
+
+
+static shadow_l2e_t *
+shadow_get_l2e(struct vcpu *v, unsigned long va)
+{
+#if GUEST_PAGING_LEVELS >= 3  /* 64bit/PAE... */
+    /* Get the l3 */
+    shadow_l3e_t *sl3e = shadow_get_l3e(v, va);
+    if ( sl3e == NULL || !(shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) )
+        return NULL;
+    ASSERT(valid_mfn(shadow_l3e_get_mfn(*sl3e)));
+    /* l3 was present; OK to get the l2 */
+#endif
+    return sh_linear_l2_table(v) + shadow_l2_linear_offset(va);
+}
+
+
+#if 0 // avoid the compiler warning for now...
+
+static shadow_l1e_t *
+shadow_get_l1e(struct vcpu *v, unsigned long va)
+{
+    /* Get the l2 */
+    shadow_l2e_t *sl2e = shadow_get_l2e(v, va);
+    if ( sl2e == NULL || !(shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT) )
+        return NULL;
+    ASSERT(valid_mfn(shadow_l2e_get_mfn(*sl2e)));
+    /* l2 was present; OK to get the l1 */
+    return sh_linear_l1_table(v) + shadow_l1_linear_offset(va);
+}
+
+#endif
+
+
+/**************************************************************************/
+/* Macros to walk pagetables.  These take the shadow of a pagetable and 
+ * walk every "interesting" entry.  That is, they don't touch Xen mappings, 
+ * and for 32-bit l2s shadowed onto PAE or 64-bit, they only touch every 
+ * second entry (since pairs of entries are managed together). For multi-page
+ * shadows they walk all pages.
+ * 
+ * Arguments are an MFN, the variable to point to each entry, a variable 
+ * to indicate that we are done (we will shortcut to the end of the scan 
+ * when _done != 0), a variable to indicate that we should avoid Xen mappings,
+ * and the code. 
+ *
+ * WARNING: These macros have side-effects.  They change the values of both 
+ * the pointer and the MFN. */ 
+
+static inline void increment_ptr_to_guest_entry(void *ptr)
+{
+    if ( ptr )
+    {
+        guest_l1e_t **entry = ptr;
+        (*entry)++;
+    }
+}
+
+/* All kinds of l1: touch all entries */
+#define _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code)       \
+do {                                                                    \
+    int _i;                                                             \
+    shadow_l1e_t *_sp = map_shadow_page((_sl1mfn));                     \
+    ASSERT((mfn_to_page(_sl1mfn)->count_info & PGC_SH_type_mask)       \
+           == PGC_SH_l1_shadow                                         \
+           || (mfn_to_page(_sl1mfn)->count_info & PGC_SH_type_mask)    \
+           == PGC_SH_fl1_shadow);                                      \
+    for ( _i = 0; _i < SHADOW_L1_PAGETABLE_ENTRIES; _i++ )              \
+    {                                                                   \
+        (_sl1e) = _sp + _i;                                             \
+        if ( shadow_l1e_get_flags(*(_sl1e)) & _PAGE_PRESENT )           \
+            {_code}                                                     \
+        if ( _done ) break;                                             \
+        increment_ptr_to_guest_entry(_gl1p);                            \
+    }                                                                   \
+    unmap_shadow_page(_sp);                                             \
+} while (0)
+
+/* 32-bit l1, on PAE or 64-bit shadows: need to walk both pages of shadow */
+#if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
+#define SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done,  _code)       \
+do {                                                                    \
+    int __done = 0;                                                     \
+    _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p,                         \
+                         ({ (__done = _done); }), _code);               \
+    _sl1mfn = _mfn(mfn_x(_sl1mfn) + 1);                                 \
+    if ( !__done )                                                      \
+        _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p,                     \
+                             ({ (__done = _done); }), _code);           \
+} while (0)
+#else /* Everything else; l1 shadows are only one page */
+#define SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code)        \
+       _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code)
+#endif
+    
+
+#if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
+
+/* 32-bit l2 on PAE/64: four pages, touch every second entry, and avoid Xen */
+#define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _xen, _code)    \
+do {                                                                      \
+    int _i, _j, __done = 0;                                               \
+    ASSERT((mfn_to_page(_sl2mfn)->count_info & PGC_SH_type_mask)         \
+           == PGC_SH_l2_32_shadow);                                      \
+    for ( _j = 0; _j < 4 && !__done; _j++ )                               \
+    {                                                                     \
+        shadow_l2e_t *_sp = map_shadow_page(_sl2mfn);                     \
+        for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i += 2 )         \
+            if ( (!(_xen))                                                \
+                 || ((_j * SHADOW_L2_PAGETABLE_ENTRIES) + _i)             \
+                 < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT) ) \
+            {                                                             \
+                (_sl2e) = _sp + _i;                                       \
+                if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT )     \
+                    {_code}                                               \
+                if ( (__done = (_done)) ) break;                          \
+                increment_ptr_to_guest_entry(_gl2p);                      \
+            }                                                             \
+        unmap_shadow_page(_sp);                                           \
+        _sl2mfn = _mfn(mfn_x(_sl2mfn) + 1);                               \
+    }                                                                     \
+} while (0)
+
+#elif GUEST_PAGING_LEVELS == 2
+
+/* 32-bit on 32-bit: avoid Xen entries */
+#define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _xen, _code)     \
+do {                                                                       \
+    int _i;                                                                \
+    shadow_l2e_t *_sp = map_shadow_page((_sl2mfn));                        \
+    ASSERT((mfn_to_page(_sl2mfn)->count_info & PGC_SH_type_mask)          \
+           == PGC_SH_l2_32_shadow);                                       \
+    for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ )                 \
+        if ( (!(_xen))                                                     \
+             ||                                                            \
+             (_i < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT)) ) \
+        {                                                                  \
+            (_sl2e) = _sp + _i;                                            \
+            if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT )          \
+                {_code}                                                    \
+            if ( _done ) break;                                            \
+            increment_ptr_to_guest_entry(_gl2p);                           \
+        }                                                                  \
+    unmap_shadow_page(_sp);                                                \
+} while (0)
+
+#elif GUEST_PAGING_LEVELS == 3
+
+/* PAE: if it's an l2h, don't touch Xen mappings */
+#define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _xen, _code)     \
+do {                                                                       \
+    int _i;                                                                \
+    shadow_l2e_t *_sp = map_shadow_page((_sl2mfn));                        \
+    ASSERT((mfn_to_page(_sl2mfn)->count_info & PGC_SH_type_mask)          \
+           == PGC_SH_l2_pae_shadow                                        \
+           || (mfn_to_page(_sl2mfn)->count_info & PGC_SH_type_mask)       \
+           == PGC_SH_l2h_pae_shadow);                                     \
+    for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ )                 \
+        if ( (!(_xen))                                                     \
+             || ((mfn_to_page(_sl2mfn)->count_info & PGC_SH_type_mask)    \
+                 != PGC_SH_l2h_pae_shadow)                                \
+             || ((_i + (3 * SHADOW_L2_PAGETABLE_ENTRIES))                  \
+                 < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT)) ) \
+        {                                                                  \
+            (_sl2e) = _sp + _i;                                            \
+            if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT )          \
+                {_code}                                                    \
+            if ( _done ) break;                                            \
+            increment_ptr_to_guest_entry(_gl2p);                           \
+        }                                                                  \
+    unmap_shadow_page(_sp);                                                \
+} while (0)
+
+#else 
+
+/* 64-bit l2: touch all entries */
+#define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _xen, _code)  \
+do {                                                                    \
+    int _i;                                                             \
+    shadow_l2e_t *_sp = map_shadow_page((_sl2mfn));                     \
+    ASSERT((mfn_to_page(_sl2mfn)->count_info & PGC_SH_type_mask)       \
+           == PGC_SH_l2_64_shadow);                                    \
+    for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ )              \
+    {                                                                   \
+        (_sl2e) = _sp + _i;                                             \
+        if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT )           \
+            {_code}                                                     \
+        if ( _done ) break;                                             \
+        increment_ptr_to_guest_entry(_gl2p);                            \
+    }                                                                   \
+    unmap_shadow_page(_sp);                                             \
+} while (0)
+
+#endif /* different kinds of l2 */
+
+#if GUEST_PAGING_LEVELS == 3
+
+/* PAE l3 subshadow: touch all entries (FOREACH_L2E will find Xen l2es). */
+#define SHADOW_FOREACH_L3E_SUB(_sl3e, _gl3p, _done, _code)             \
+do {                                                                    \
+    int _i;                                                             \
+    for ( _i = 0; _i < 4; _i++ )                                        \
+    {                                                                   \
+        if ( shadow_l3e_get_flags(*(_sl3e)) & _PAGE_PRESENT )           \
+            {_code}                                                     \
+        if ( _done ) break;                                             \
+        _sl3e++;                                                        \
+        increment_ptr_to_guest_entry(_gl3p);                            \
+    }                                                                   \
+} while (0)
+
+/* PAE l3 full shadow: call subshadow walk on all valid l3 subshadows */
+#define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code)        \
+do {                                                                    \
+    int _i, _j, _k, __done = 0;                                         \
+    ASSERT((mfn_to_page(_sl3mfn)->count_info & PGC_SH_type_mask)       \
+           == PGC_SH_l3_pae_shadow);                                   \
+    /* The subshadows are split, 64 on each page of the shadow */       \
+    for ( _j = 0; _j < 2 && !__done; _j++ )                             \
+    {                                                                   \
+        void *_sp = sh_map_domain_page(_sl3mfn);                       \
+        for ( _i = 0; _i < 64; _i++ )                                   \
+        {                                                               \
+            /* Every second 32-byte region is a bookkeeping entry */    \
+            _sl3e = (shadow_l3e_t *)(_sp + (64 * _i));                  \
+            if ( (sl3p_to_info(_sl3e))->refcount > 0 )                  \
+                SHADOW_FOREACH_L3E_SUB(_sl3e, _gl3p,                   \
+                                        ({ __done = (_done); __done; }), \
+                                        _code);                         \
+            else                                                        \
+                for ( _k = 0 ; _k < 4 ; _k++ )                          \
+                    increment_ptr_to_guest_entry(_gl3p);                \
+            if ( __done ) break;                                        \
+        }                                                               \
+        sh_unmap_domain_page(_sp);                                     \
+        _sl3mfn = _mfn(mfn_x(_sl3mfn) + 1);                             \
+    }                                                                   \
+} while (0)
+
+#elif GUEST_PAGING_LEVELS == 4
+
+/* 64-bit l3: touch all entries */
+#define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code)        \
+do {                                                                    \
+    int _i;                                                             \
+    shadow_l3e_t *_sp = map_shadow_page((_sl3mfn));                     \
+    ASSERT((mfn_to_page(_sl3mfn)->count_info & PGC_SH_type_mask)       \
+           == PGC_SH_l3_64_shadow);                                    \
+    for ( _i = 0; _i < SHADOW_L3_PAGETABLE_ENTRIES; _i++ )              \
+    {                                                                   \
+        (_sl3e) = _sp + _i;                                             \
+        if ( shadow_l3e_get_flags(*(_sl3e)) & _PAGE_PRESENT )           \
+            {_code}                                                     \
+        if ( _done ) break;                                             \
+        increment_ptr_to_guest_entry(_gl3p);                            \
+    }                                                                   \
+    unmap_shadow_page(_sp);                                             \
+} while (0)
+
+/* 64-bit l4: avoid Xen mappings */
+#define SHADOW_FOREACH_L4E(_sl4mfn, _sl4e, _gl4p, _done, _xen, _code)  \
+do {                                                                    \
+    int _i;                                                             \
+    shadow_l4e_t *_sp = map_shadow_page((_sl4mfn));                     \
+    ASSERT((mfn_to_page(_sl4mfn)->count_info & PGC_SH_type_mask)       \
+           == PGC_SH_l4_64_shadow);                                    \
+    for ( _i = 0; _i < SHADOW_L4_PAGETABLE_ENTRIES; _i++ )              \
+    {                                                                   \
+        if ( (!(_xen)) || is_guest_l4_slot(_i) )                        \
+        {                                                               \
+            (_sl4e) = _sp + _i;                                         \
+            if ( shadow_l4e_get_flags(*(_sl4e)) & _PAGE_PRESENT )       \
+                {_code}                                                 \
+            if ( _done ) break;                                         \
+        }                                                               \
+        increment_ptr_to_guest_entry(_gl4p);                            \
+    }                                                                   \
+    unmap_shadow_page(_sp);                                             \
+} while (0)
+
+#endif
+
+
+
+/**************************************************************************/
+/* Functions to install Xen mappings and linear mappings in shadow pages */
+
+static mfn_t sh_make_shadow(struct vcpu *v, mfn_t gmfn, u32 shadow_type);
+
+// XXX -- this function should probably be moved to shadow-common.c, but that
+//        probably wants to wait until the shadow types have been moved from
+//        shadow-types.h to shadow-private.h
+//
+#if CONFIG_PAGING_LEVELS == 4 && GUEST_PAGING_LEVELS == 4
+void sh_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn)
+{
+    struct domain *d = v->domain;
+    shadow_l4e_t *sl4e;
+
+    sl4e = sh_map_domain_page(sl4mfn);
+    ASSERT(sl4e != NULL);
+    ASSERT(sizeof (l4_pgentry_t) == sizeof (shadow_l4e_t));
+    
+    /* Copy the common Xen mappings from the idle domain */
+    memcpy(&sl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
+           &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
+           ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
+
+    /* Install the per-domain mappings for this domain */
+    sl4e[shadow_l4_table_offset(PERDOMAIN_VIRT_START)] =
+        shadow_l4e_from_mfn(page_to_mfn(virt_to_page(d->arch.mm_perdomain_l3)),
+                            __PAGE_HYPERVISOR);
+
+    /* Linear mapping */
+    sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] =
+        shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR);
+    sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] =
+        shadow_l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR);
+
+    if ( shadow_mode_translate(v->domain) )
+    {
+        /* install domain-specific P2M table */
+        sl4e[shadow_l4_table_offset(RO_MPT_VIRT_START)] =
+            shadow_l4e_from_mfn(pagetable_get_mfn(d->arch.phys_table),
+                                __PAGE_HYPERVISOR);
+    }
+
+    sh_unmap_domain_page(sl4e);    
+}
+#endif
+
+#if CONFIG_PAGING_LEVELS == 3 && GUEST_PAGING_LEVELS == 3
+// For 3-on-3 PV guests, we need to make sure the xen mappings are in
+// place, which means that we need to populate the l2h entry in the l3
+// table.
+
+void sh_install_xen_entries_in_l2h(struct vcpu *v, 
+                                    mfn_t sl2hmfn)
+{
+    struct domain *d = v->domain;
+    shadow_l2e_t *sl2e;
+    int i;
+
+    sl2e = sh_map_domain_page(sl2hmfn);
+    ASSERT(sl2e != NULL);
+    ASSERT(sizeof (l2_pgentry_t) == sizeof (shadow_l2e_t));
+    
+    /* Copy the common Xen mappings from the idle domain */
+    memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
+           &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
+           L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
+
+    /* Install the per-domain mappings for this domain */
+    for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+        sl2e[shadow_l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+            shadow_l2e_from_mfn(
+                page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i),
+                __PAGE_HYPERVISOR);
+    
+    /* We don't set up a linear mapping here because we can't until this
+     * l2h is installed in an l3e.  sh_update_linear_entries() handles
+     * the linear mappings when the l3 is loaded. */
+
+    if ( shadow_mode_translate(d) )
+    {
+        /* Install the domain-specific p2m table */
+        l3_pgentry_t *p2m;
+        ASSERT(pagetable_get_pfn(d->arch.phys_table) != 0);
+        p2m = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
+        for ( i = 0; i < MACHPHYS_MBYTES>>1; i++ )
+        {
+            sl2e[shadow_l2_table_offset(RO_MPT_VIRT_START) + i] =
+                shadow_l2e_from_mfn(_mfn(l3e_get_pfn(p2m[i])),
+                                    __PAGE_HYPERVISOR);
+        }
+        sh_unmap_domain_page(p2m);
+    }
+    
+    sh_unmap_domain_page(sl2e);
+}
+
+void sh_install_xen_entries_in_l3(struct vcpu *v, mfn_t gl3mfn, mfn_t sl3mfn)
+{
+    shadow_l3e_t *sl3e;
+    guest_l3e_t *gl3e = v->arch.guest_vtable;
+    shadow_l3e_t new_sl3e;
+    gfn_t l2gfn;
+    mfn_t l2gmfn, l2smfn;
+    int r;
+
+    ASSERT(!shadow_mode_external(v->domain));
+    ASSERT(guest_l3e_get_flags(gl3e[3]) & _PAGE_PRESENT);
+    l2gfn = guest_l3e_get_gfn(gl3e[3]);
+    l2gmfn = sh_gfn_to_mfn(v->domain, gfn_x(l2gfn));
+    l2smfn = get_shadow_status(v, l2gmfn, PGC_SH_l2h_shadow);
+    if ( !valid_mfn(l2smfn) )
+    {
+        l2smfn = sh_make_shadow(v, l2gmfn, PGC_SH_l2h_shadow);
+    }
+    l3e_propagate_from_guest(v, &gl3e[3], gl3mfn, l2smfn, &new_sl3e,
+                             ft_prefetch);
+    sl3e = sh_map_domain_page(sl3mfn);
+    r = shadow_set_l3e(v, &sl3e[3], new_sl3e, sl3mfn);
+    sh_unmap_domain_page(sl3e);
+}
+#endif
+
+
+#if CONFIG_PAGING_LEVELS == 2 && GUEST_PAGING_LEVELS == 2
+void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn)
+{
+    struct domain *d = v->domain;
+    shadow_l2e_t *sl2e;
+    int i;
+
+    sl2e = sh_map_domain_page(sl2mfn);
+    ASSERT(sl2e != NULL);
+    ASSERT(sizeof (l2_pgentry_t) == sizeof (shadow_l2e_t));
+    
+    /* Copy the common Xen mappings from the idle domain */
+    memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT],
+           &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT],
+           L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
+
+    /* Install the per-domain mappings for this domain */
+    for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+        sl2e[shadow_l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+            shadow_l2e_from_mfn(
+                page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i),
+                __PAGE_HYPERVISOR);
+
+    /* Linear mapping */
+    sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START)] =
+        shadow_l2e_from_mfn(gl2mfn, __PAGE_HYPERVISOR);
+    sl2e[shadow_l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
+        shadow_l2e_from_mfn(sl2mfn, __PAGE_HYPERVISOR);
+
+    if ( shadow_mode_translate(d) )
+    {
+        /* install domain-specific P2M table */
+        sl2e[shadow_l2_table_offset(RO_MPT_VIRT_START)] =
+            shadow_l2e_from_mfn(pagetable_get_mfn(d->arch.phys_table),
+                                __PAGE_HYPERVISOR);
+    }
+
+    sh_unmap_domain_page(sl2e);
+}
+#endif
+
+
+
+
+
+/**************************************************************************/
+/* Create a shadow of a given guest page.
+ */
+static mfn_t
+sh_make_shadow(struct vcpu *v, mfn_t gmfn, u32 shadow_type)
+{
+    mfn_t smfn = shadow_alloc(v->domain, shadow_type, mfn_x(gmfn));
+    SHADOW_DEBUG(MAKE_SHADOW, "(%05lx, %u)=>%05lx\n",
+                  mfn_x(gmfn), shadow_type, mfn_x(smfn));
+
+    if ( shadow_type != PGC_SH_guest_root_type )
+        /* Lower-level shadow, not yet linked form a higher level */
+        mfn_to_page(smfn)->up = 0;
+
+    // Create the Xen mappings...
+    if ( !shadow_mode_external(v->domain) )
+    {
+        switch (shadow_type) 
+        {
+#if CONFIG_PAGING_LEVELS == 4 && GUEST_PAGING_LEVELS == 4
+        case PGC_SH_l4_shadow:
+            sh_install_xen_entries_in_l4(v, gmfn, smfn); break;
+#endif
+#if CONFIG_PAGING_LEVELS == 3 && GUEST_PAGING_LEVELS == 3
+        case PGC_SH_l3_shadow:
+            sh_install_xen_entries_in_l3(v, gmfn, smfn); break;
+        case PGC_SH_l2h_shadow:
+            sh_install_xen_entries_in_l2h(v, smfn); break;
+#endif
+#if CONFIG_PAGING_LEVELS == 2 && GUEST_PAGING_LEVELS == 2
+        case PGC_SH_l2_shadow:
+            sh_install_xen_entries_in_l2(v, gmfn, smfn); break;
+#endif
+        default: /* Do nothing */ break;
+        }
+    }
+    
+    shadow_promote(v, gmfn, shadow_type);
+    set_shadow_status(v, gmfn, shadow_type, smfn);
+
+    return smfn;
+}
+
+/* Make a splintered superpage shadow */
+static mfn_t
+make_fl1_shadow(struct vcpu *v, gfn_t gfn)
+{
+    mfn_t smfn = shadow_alloc(v->domain, PGC_SH_fl1_shadow,
+                               (unsigned long) gfn_x(gfn));
+
+    SHADOW_DEBUG(MAKE_SHADOW, "(%" SH_PRI_gfn ")=>%" SH_PRI_mfn "\n",
+                  gfn_x(gfn), mfn_x(smfn));
+
+    set_fl1_shadow_status(v, gfn, smfn);
+    return smfn;
+}
+
+
+#if SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS
+mfn_t
+sh_make_monitor_table(struct vcpu *v)
+{
+
+    ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0);
+    
+#if CONFIG_PAGING_LEVELS == 4    
+    {
+        struct domain *d = v->domain;
+        mfn_t m4mfn;
+        m4mfn = shadow_alloc(d, PGC_SH_monitor_table, 0);
+        sh_install_xen_entries_in_l4(v, m4mfn, m4mfn);
+        /* Remember the level of this table */
+        mfn_to_page(m4mfn)->shadow_flags = 4;
+#if SHADOW_PAGING_LEVELS < 4
+        // Install a monitor l3 table in slot 0 of the l4 table.
+        // This is used for shadow linear maps.
+        {
+            mfn_t m3mfn; 
+            l4_pgentry_t *l4e;
+            m3mfn = shadow_alloc(d, PGC_SH_monitor_table, 0);
+            mfn_to_page(m3mfn)->shadow_flags = 3;
+            l4e = sh_map_domain_page(m4mfn);
+            l4e[0] = l4e_from_pfn(mfn_x(m3mfn), __PAGE_HYPERVISOR);
+            sh_unmap_domain_page(l4e);
+        }
+#endif /* SHADOW_PAGING_LEVELS < 4 */
+        return m4mfn;
+    }
+
+#elif CONFIG_PAGING_LEVELS == 3
+
+    {
+        struct domain *d = v->domain;
+        mfn_t m3mfn, m2mfn; 
+        l3_pgentry_t *l3e;
+        l2_pgentry_t *l2e;
+        int i;
+
+        m3mfn = shadow_alloc(d, PGC_SH_monitor_table, 0);
+        /* Remember the level of this table */
+        mfn_to_page(m3mfn)->shadow_flags = 3;
+
+        // Install a monitor l2 table in slot 3 of the l3 table.
+        // This is used for all Xen entries, including linear maps
+        m2mfn = shadow_alloc(d, PGC_SH_monitor_table, 0);
+        mfn_to_page(m2mfn)->shadow_flags = 2;
+        l3e = sh_map_domain_page(m3mfn);
+        l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT);
+        sh_install_xen_entries_in_l2h(v, m2mfn);
+        /* Install the monitor's own linear map */
+        l2e = sh_map_domain_page(m2mfn);
+        for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
+            l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
+                (l3e_get_flags(l3e[i]) & _PAGE_PRESENT) 
+                ? l2e_from_pfn(l3e_get_pfn(l3e[i]), __PAGE_HYPERVISOR) 
+                : l2e_empty();
+        sh_unmap_domain_page(l2e);
+        sh_unmap_domain_page(l3e);
+
+        SHADOW_PRINTK("new monitor table: %#lx\n", mfn_x(m3mfn));
+        return m3mfn;
+    }
+
+#elif CONFIG_PAGING_LEVELS == 2
+
+    {
+        struct domain *d = v->domain;
+        mfn_t m2mfn;
+        m2mfn = shadow_alloc(d, PGC_SH_monitor_table, 0);
+        sh_install_xen_entries_in_l2(v, m2mfn, m2mfn);
+        /* Remember the level of this table */
+        mfn_to_page(m2mfn)->shadow_flags = 2;
+        return m2mfn;
+    }
+
+#else
+#error this should not happen
+#endif /* CONFIG_PAGING_LEVELS */
+}
+#endif /* SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS */
+
+/**************************************************************************/
+/* These functions also take a virtual address and return the level-N
+ * shadow table mfn and entry, but they create the shadow pagetables if
+ * they are needed.  The "demand" argument is non-zero when handling
+ * a demand fault (so we know what to do about accessed bits &c).
+ * If the necessary tables are not present in the guest, they return NULL. */
+#if GUEST_PAGING_LEVELS >= 4
+static shadow_l4e_t * shadow_get_and_create_l4e(struct vcpu *v, 
+                                                walk_t *gw, 
+                                                mfn_t *sl4mfn)
+{
+    /* There is always a shadow of the top level table.  Get it. */
+    *sl4mfn = pagetable_get_mfn(v->arch.shadow_table);
+    /* Reading the top level table is always valid. */
+    return sh_linear_l4_table(v) + shadow_l4_linear_offset(gw->va);
+}
+#endif /* GUEST_PAGING_LEVELS >= 4 */
+
+
+#if GUEST_PAGING_LEVELS >= 3
+static shadow_l3e_t * shadow_get_and_create_l3e(struct vcpu *v, 
+                                                walk_t *gw, 
+                                                mfn_t *sl3mfn,
+                                                fetch_type_t ft)
+{
+#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
+    mfn_t sl4mfn;
+    shadow_l4e_t *sl4e;
+    if ( !valid_mfn(gw->l3mfn) ) return NULL; /* No guest page. */
+    /* Get the l4e */
+    sl4e = shadow_get_and_create_l4e(v, gw, &sl4mfn);
+    ASSERT(sl4e != NULL);
+    if ( shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT ) 
+    {
+        *sl3mfn = shadow_l4e_get_mfn(*sl4e);
+        ASSERT(valid_mfn(*sl3mfn));
+    } 
+    else 
+    {
+        int r;
+        shadow_l4e_t new_sl4e;
+        /* No l3 shadow installed: find and install it. */
+        *sl3mfn = get_shadow_status(v, gw->l3mfn, PGC_SH_l3_shadow);
+        if ( !valid_mfn(*sl3mfn) ) 
+        {
+            /* No l3 shadow of this page exists at all: make one. */
+            *sl3mfn = sh_make_shadow(v, gw->l3mfn, PGC_SH_l3_shadow);
+        }
+        /* Install the new sl3 table in the sl4e */
+        l4e_propagate_from_guest(v, gw->l4e, gw->l4mfn, 
+                                 *sl3mfn, &new_sl4e, ft);
+        r = shadow_set_l4e(v, sl4e, new_sl4e, sl4mfn);
+        ASSERT((r & SHADOW_SET_FLUSH) == 0);
+    }
+    /* Now follow it down a level.  Guaranteed to succeed. */
+    return sh_linear_l3_table(v) + shadow_l3_linear_offset(gw->va);
+#else /* PAE... */
+    /* There is always a shadow of the top level table.  Get it. */
+    *sl3mfn = pagetable_get_mfn(v->arch.shadow_table);
+    /* This next line is important: the shadow l3 table is in an 8k
+     * shadow and we need to return the right mfn of the pair. This call
+     * will set it for us as a side-effect. */
+    (void) shadow_l3_index(sl3mfn, guest_index(gw->l3e));
+    ASSERT(v->arch.shadow_vtable);
+    return ((shadow_l3e_t *)v->arch.shadow_vtable) 
+        + shadow_l3_table_offset(gw->va);
+#endif /* GUEST_PAGING_LEVELS >= 4 */
+}
+#endif /* GUEST_PAGING_LEVELS >= 3 */
+
+
+static shadow_l2e_t * shadow_get_and_create_l2e(struct vcpu *v, 
+                                                walk_t *gw, 
+                                                mfn_t *sl2mfn,
+                                                fetch_type_t ft)
+{
+#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64bit... */
+    mfn_t sl3mfn = _mfn(INVALID_MFN);
+    shadow_l3e_t *sl3e;
+    if ( !valid_mfn(gw->l2mfn) ) return NULL; /* No guest page. */
+    /* Get the l3e */
+    sl3e = shadow_get_and_create_l3e(v, gw, &sl3mfn, ft);
+    ASSERT(sl3e != NULL);  /* Since we know guest PT is valid this far */
+    if ( shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT ) 
+    {
+        *sl2mfn = shadow_l3e_get_mfn(*sl3e);
+        ASSERT(valid_mfn(*sl2mfn));
+    } 
+    else 
+    {
+        int r;
+        shadow_l3e_t new_sl3e;
+        /* No l2 shadow installed: find and install it. */
+        *sl2mfn = get_shadow_status(v, gw->l2mfn, PGC_SH_l2_shadow);
+        if ( !valid_mfn(*sl2mfn) ) 
+        {
+            /* No l2 shadow of this page exists at all: make one. */
+            *sl2mfn = sh_make_shadow(v, gw->l2mfn, PGC_SH_l2_shadow);
+        }
+        /* Install the new sl2 table in the sl3e */
+        l3e_propagate_from_guest(v, gw->l3e, gw->l3mfn, 
+                                 *sl2mfn, &new_sl3e, ft);
+        r = shadow_set_l3e(v, sl3e, new_sl3e, sl3mfn);
+        ASSERT((r & SHADOW_SET_FLUSH) == 0);
+#if GUEST_PAGING_LEVELS == 3 
+        /* Need to sync up the linear maps, as we are about to use them */
+        ASSERT( r & SHADOW_SET_L3PAE_RECOPY );
+        sh_pae_recopy(v->domain);
+#endif
+    }
+    /* Now follow it down a level.  Guaranteed to succeed. */
+    return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
+#else /* 32bit... */
+    /* There is always a shadow of the top level table.  Get it. */
+    *sl2mfn = pagetable_get_mfn(v->arch.shadow_table);
+    /* This next line is important: the guest l2 has a 16k
+     * shadow, we need to return the right mfn of the four. This
+     * call will set it for us as a side-effect. */
+    (void) shadow_l2_index(sl2mfn, guest_index(gw->l2e));
+    /* Reading the top level table is always valid. */
+    return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
+#endif 
+}
+
+
+static shadow_l1e_t * shadow_get_and_create_l1e(struct vcpu *v, 
+                                                walk_t *gw, 
+                                                mfn_t *sl1mfn,
+                                                fetch_type_t ft)
+{
+    mfn_t sl2mfn;
+    shadow_l2e_t *sl2e;
+
+    /* Get the l2e */
+    sl2e = shadow_get_and_create_l2e(v, gw, &sl2mfn, ft);
+    if ( sl2e == NULL ) return NULL;
+    if ( shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT ) 
+    {
+        *sl1mfn = shadow_l2e_get_mfn(*sl2e);
+        ASSERT(valid_mfn(*sl1mfn));
+    } 
+    else 
+    {
+        shadow_l2e_t new_sl2e;
+        int r, flags = guest_l2e_get_flags(*gw->l2e);
+        /* No l1 shadow installed: find and install it. */
+        if ( !(flags & _PAGE_PRESENT) )
+            return NULL; /* No guest page. */
+        if ( guest_supports_superpages(v) && (flags & _PAGE_PSE) ) 
+        {
+            /* Splintering a superpage */
+            gfn_t l2gfn = guest_l2e_get_gfn(*gw->l2e);
+            *sl1mfn = get_fl1_shadow_status(v, l2gfn);
+            if ( !valid_mfn(*sl1mfn) ) 
+            {
+                /* No fl1 shadow of this superpage exists at all: make one. */
+                *sl1mfn = make_fl1_shadow(v, l2gfn);
+            }
+        } 
+        else 
+        {
+            /* Shadowing an actual guest l1 table */
+            if ( !valid_mfn(gw->l2mfn) ) return NULL; /* No guest page. */
+            *sl1mfn = get_shadow_status(v, gw->l1mfn, PGC_SH_l1_shadow);
+            if ( !valid_mfn(*sl1mfn) ) 
+            {
+                /* No l1 shadow of this page exists at all: make one. */
+                *sl1mfn = sh_make_shadow(v, gw->l1mfn, PGC_SH_l1_shadow);
+            }
+        }
+        /* Install the new sl1 table in the sl2e */
+        l2e_propagate_from_guest(v, gw->l2e, gw->l2mfn, 
+                                 *sl1mfn, &new_sl2e, ft);
+        r = shadow_set_l2e(v, sl2e, new_sl2e, sl2mfn);
+        ASSERT((r & SHADOW_SET_FLUSH) == 0);        
+        /* This next line is important: in 32-on-PAE and 32-on-64 modes,
+         * the guest l1 table has an 8k shadow, and we need to return
+         * the right mfn of the pair. This call will set it for us as a
+         * side-effect.  (In all other cases, it's a no-op and will be
+         * compiled out.) */
+        (void) shadow_l1_index(sl1mfn, guest_l1_table_offset(gw->va));
+    }
+    /* Now follow it down a level.  Guaranteed to succeed. */
+    return sh_linear_l1_table(v) + shadow_l1_linear_offset(gw->va);
+}
+
+
+
+/**************************************************************************/
+/* Destructors for shadow tables: 
+ * Unregister the shadow, decrement refcounts of any entries present in it,
+ * and release the memory.
+ *
+ * N.B. These destructors do not clear the contents of the shadows.
+ *      This allows us to delay TLB shootdowns until the page is being reused.
+ *      See shadow_alloc() and shadow_free() for how this is handled.
+ */
+
+#if GUEST_PAGING_LEVELS >= 4
+void sh_destroy_l4_shadow(struct vcpu *v, mfn_t smfn)
+{
+    shadow_l4e_t *sl4e;
+    u32 t = mfn_to_page(smfn)->count_info & PGC_SH_type_mask;
+    mfn_t gmfn, sl4mfn;
+    int xen_mappings;
+
+    SHADOW_DEBUG(DESTROY_SHADOW,
+                  "%s(%05lx)\n", __func__, mfn_x(smfn));
+    ASSERT(t == PGC_SH_l4_shadow);
+
+    /* Record that the guest page isn't shadowed any more (in this type) */
+    gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
+    delete_shadow_status(v, gmfn, t, smfn);
+    shadow_demote(v, gmfn, t);
+    /* Take this shadow off the list of root shadows */
+    list_del_init(&mfn_to_page(smfn)->list);
+
+    /* Decrement refcounts of all the old entries */
+    xen_mappings = (!shadow_mode_external(v->domain));
+    sl4mfn = smfn; 
+    SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, 0, xen_mappings, {
+        if ( shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT ) 
+        {
+            sh_put_ref(v, shadow_l4e_get_mfn(*sl4e),
+                        (((paddr_t)mfn_x(sl4mfn)) << PAGE_SHIFT) 
+                        | ((unsigned long)sl4e & ~PAGE_MASK));
+        }
+    });
+    
+    /* Put the memory back in the pool */
+    shadow_free(v->domain, smfn);
+}
+#endif    
+
+#if GUEST_PAGING_LEVELS >= 3
+void sh_destroy_l3_shadow(struct vcpu *v, mfn_t smfn)
+{
+    shadow_l3e_t *sl3e;
+    u32 t = mfn_to_page(smfn)->count_info & PGC_SH_type_mask;
+    mfn_t gmfn, sl3mfn;
+
+    SHADOW_DEBUG(DESTROY_SHADOW,
+                  "%s(%05lx)\n", __func__, mfn_x(smfn));
+    ASSERT(t == PGC_SH_l3_shadow);
+
+    /* Record that the guest page isn't shadowed any more (in this type) */
+    gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
+    delete_shadow_status(v, gmfn, t, smfn);
+    shadow_demote(v, gmfn, t);
+#if GUEST_PAGING_LEVELS == 3
+    /* Take this shadow off the list of root shadows */
+    list_del_init(&mfn_to_page(smfn)->list);
+#endif
+
+    /* Decrement refcounts of all the old entries */
+    sl3mfn = smfn; 
+    SHADOW_FOREACH_L3E(sl3mfn, sl3e, 0, 0, {
+        if ( shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT ) 
+            sh_put_ref(v, shadow_l3e_get_mfn(*sl3e),
+                        (((paddr_t)mfn_x(sl3mfn)) << PAGE_SHIFT) 
+                        | ((unsigned long)sl3e & ~PAGE_MASK));
+    });
+
+    /* Put the memory back in the pool */
+    shadow_free(v->domain, smfn);
+}
+#endif    
+
+
+#if GUEST_PAGING_LEVELS == 3
+static void sh_destroy_l3_subshadow(struct vcpu *v, 
+                                     shadow_l3e_t *sl3e)
+/* Tear down just a single 4-entry l3 on a 2-page l3 shadow. */
+{
+    int i;
+    ASSERT((unsigned long)sl3e % (4 * sizeof (shadow_l3e_t)) == 0); 
+    for ( i = 0; i < GUEST_L3_PAGETABLE_ENTRIES; i++ ) 
+        if ( shadow_l3e_get_flags(sl3e[i]) & _PAGE_PRESENT ) 
+            sh_put_ref(v, shadow_l3e_get_mfn(sl3e[i]),
+                        maddr_from_mapped_domain_page(sl3e));
+}
+#endif
+
+#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
+void sh_unpin_all_l3_subshadows(struct vcpu *v, mfn_t smfn)
+/* Walk a full PAE l3 shadow, unpinning all of the subshadows on it */
+{
+    int i, j;
+    struct pae_l3_bookkeeping *bk;
+    
+    ASSERT((mfn_to_page(smfn)->count_info & PGC_SH_type_mask) 
+           == PGC_SH_l3_pae_shadow);
+    /* The subshadows are split, 64 on each page of the shadow */
+    for ( i = 0; i < 2; i++ ) 
+    {
+        void *p = sh_map_domain_page(_mfn(mfn_x(smfn) + i));
+        for ( j = 0; j < 64; j++ )
+        {
+            /* Every second 32-byte region is a bookkeeping entry */
+            bk = (struct pae_l3_bookkeeping *)(p + (64 * j) + 32);
+            if ( bk->pinned )
+                sh_unpin_l3_subshadow(v, (shadow_l3e_t *)(p + (64*j)), smfn);
+            /* Check whether we've just freed the whole shadow */
+            if ( (mfn_to_page(smfn)->count_info & PGC_SH_count_mask) == 0 ) 
+            {
+                sh_unmap_domain_page(p);
+                return;
+            }
+        }
+        sh_unmap_domain_page(p);
+    }
+}
+#endif
+
+void sh_destroy_l2_shadow(struct vcpu *v, mfn_t smfn)
+{
+    shadow_l2e_t *sl2e;
+    u32 t = mfn_to_page(smfn)->count_info & PGC_SH_type_mask;
+    mfn_t gmfn, sl2mfn;
+    int xen_mappings;
+
+    SHADOW_DEBUG(DESTROY_SHADOW,
+                  "%s(%05lx)\n", __func__, mfn_x(smfn));
+    ASSERT(t == PGC_SH_l2_shadow 
+           || t == PGC_SH_l2h_pae_shadow);
+
+    /* Record that the guest page isn't shadowed any more (in this type) */
+    gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
+    delete_shadow_status(v, gmfn, t, smfn);
+    shadow_demote(v, gmfn, t);
+#if GUEST_PAGING_LEVELS == 2
+    /* Take this shadow off the list of root shadows */
+    list_del_init(&mfn_to_page(smfn)->list);
+#endif
+
+    /* Decrement refcounts of all the old entries */
+    sl2mfn = smfn;
+    xen_mappings = (!shadow_mode_external(v->domain) &&
+                    ((GUEST_PAGING_LEVELS == 2) ||
+                     ((GUEST_PAGING_LEVELS == 3) &&
+                      (t == PGC_SH_l2h_pae_shadow))));
+    SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, xen_mappings, {
+        if ( shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT ) 
+            sh_put_ref(v, shadow_l2e_get_mfn(*sl2e),
+                        (((paddr_t)mfn_x(sl2mfn)) << PAGE_SHIFT) 
+                        | ((unsigned long)sl2e & ~PAGE_MASK));
+    });
+
+    /* Put the memory back in the pool */
+    shadow_free(v->domain, smfn);
+}
+
+void sh_destroy_l1_shadow(struct vcpu *v, mfn_t smfn)
+{
+    struct domain *d = v->domain;
+    shadow_l1e_t *sl1e;
+    u32 t = mfn_to_page(smfn)->count_info & PGC_SH_type_mask;
+
+    SHADOW_DEBUG(DESTROY_SHADOW,
+                  "%s(%05lx)\n", __func__, mfn_x(smfn));
+    ASSERT(t == PGC_SH_l1_shadow || t == PGC_SH_fl1_shadow);
+
+    /* Record that the guest page isn't shadowed any more (in this type) */
+    if ( t == PGC_SH_fl1_shadow )
+    {
+        gfn_t gfn = _gfn(mfn_to_page(smfn)->u.inuse.type_info);
+        delete_fl1_shadow_status(v, gfn, smfn);
+    }
+    else 
+    {
+        mfn_t gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
+        delete_shadow_status(v, gmfn, t, smfn);
+        shadow_demote(v, gmfn, t);
+    }
+    
+    if ( shadow_mode_refcounts(d) )
+    {
+        /* Decrement refcounts of all the old entries */
+        mfn_t sl1mfn = smfn; 
+        SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
+            if ( shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT ) 
+                shadow_put_page_from_l1e(*sl1e, d);
+        });
+    }
+    
+    /* Put the memory back in the pool */
+    shadow_free(v->domain, smfn);
+}
+
+#if SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS
+void sh_destroy_monitor_table(struct vcpu *v, mfn_t mmfn)
+{
+    struct domain *d = v->domain;
+    ASSERT((mfn_to_page(mmfn)->count_info & PGC_SH_type_mask)
+           == PGC_SH_monitor_table);
+
+#if (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS != 4)
+    /* Need to destroy the l3 monitor page in slot 0 too */
+    {
+        l4_pgentry_t *l4e = sh_map_domain_page(mmfn);
+        ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT);
+        shadow_free(d, _mfn(l4e_get_pfn(l4e[0])));
+        sh_unmap_domain_page(l4e);
+    }
+#elif CONFIG_PAGING_LEVELS == 3
+    /* Need to destroy the l2 monitor page in slot 4 too */
+    {
+        l3_pgentry_t *l3e = sh_map_domain_page(mmfn);
+        ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
+        shadow_free(d, _mfn(l3e_get_pfn(l3e[3])));
+        sh_unmap_domain_page(l3e);
+    }
+#endif
+
+    /* Put the memory back in the pool */
+    shadow_free(d, mmfn);
+}
+#endif
+
+/**************************************************************************/
+/* Functions to destroy non-Xen mappings in a pagetable hierarchy.
+ * These are called from common code when we are running out of shadow
+ * memory, and unpinning all the top-level shadows hasn't worked. 
+ *
+ * This implementation is pretty crude and slow, but we hope that it won't 
+ * be called very often. */
+
+#if GUEST_PAGING_LEVELS == 2
+
+void sh_unhook_32b_mappings(struct vcpu *v, mfn_t sl2mfn)
+{    
+    shadow_l2e_t *sl2e;
+    int xen_mappings = !shadow_mode_external(v->domain);
+    SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, xen_mappings, {
+        (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
+    });
+}
+
+#elif GUEST_PAGING_LEVELS == 3
+
+void sh_unhook_pae_mappings(struct vcpu *v, mfn_t sl3mfn)
+/* Walk a full PAE l3 shadow, unhooking entries from all the subshadows */
+{
+    shadow_l3e_t *sl3e;
+    SHADOW_FOREACH_L3E(sl3mfn, sl3e, 0, 0, {
+        if ( (shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) ) {
+            mfn_t sl2mfn = shadow_l3e_get_mfn(*sl3e);
+            if ( (mfn_to_page(sl2mfn)->count_info & PGC_SH_type_mask) 
+                 == PGC_SH_l2h_pae_shadow ) 
+            {
+                /* High l2: need to pick particular l2es to unhook */
+                shadow_l2e_t *sl2e;
+                SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, 1, {
+                    (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
+                });
+            }
+            else
+            {
+                /* Normal l2: can safely unhook the whole l3e */
+                (void) shadow_set_l3e(v, sl3e, shadow_l3e_empty(), sl3mfn);
+            }
+        }
+    });
+    /* We've changed PAE L3 entries: must sync up various copies of them */
+    sh_pae_recopy(v->domain);
+}
+
+#elif GUEST_PAGING_LEVELS == 4
+
+void sh_unhook_64b_mappings(struct vcpu *v, mfn_t sl4mfn)
+{
+    shadow_l4e_t *sl4e;
+    int xen_mappings = !shadow_mode_external(v->domain);
+    SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, 0, xen_mappings, {
+        (void) shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn);
+    });
+}
+
+#endif
+
+/**************************************************************************/
+/* Internal translation functions.
+ * These functions require a pointer to the shadow entry that will be updated.
+ */
+
+/* These functions take a new guest entry, translate it to shadow and write 
+ * the shadow entry.
+ *
+ * They return the same bitmaps as the shadow_set_lXe() functions.
+ */
+
+#if GUEST_PAGING_LEVELS >= 4
+static int validate_gl4e(struct vcpu *v, void *new_ge, mfn_t sl4mfn, void *se)
+{
+    shadow_l4e_t new_sl4e;
+    guest_l4e_t *new_gl4e = new_ge;
+    shadow_l4e_t *sl4p = se;
+    mfn_t sl3mfn = _mfn(INVALID_MFN);
+    int result = 0;
+
+    perfc_incrc(shadow_validate_gl4e_calls);
+
+    if ( guest_l4e_get_flags(*new_gl4e) & _PAGE_PRESENT )
+    {
+        gfn_t gl3gfn = guest_l4e_get_gfn(*new_gl4e);
+        mfn_t gl3mfn = vcpu_gfn_to_mfn(v, gl3gfn);
+        if ( valid_mfn(gl3mfn) )
+            sl3mfn = get_shadow_status(v, gl3mfn, PGC_SH_l3_shadow);
+        else
+            result |= SHADOW_SET_ERROR;
+    }
+    l4e_propagate_from_guest(v, new_gl4e, _mfn(INVALID_MFN),
+                             sl3mfn, &new_sl4e, ft_prefetch);
+    result |= shadow_set_l4e(v, sl4p, new_sl4e, sl4mfn);
+    return result;
+}
+#endif // GUEST_PAGING_LEVELS >= 4
+
+#if GUEST_PAGING_LEVELS >= 3
+static int validate_gl3e(struct vcpu *v, void *new_ge, mfn_t sl3mfn, void *se)
+{
+    shadow_l3e_t new_sl3e;
+    guest_l3e_t *new_gl3e = new_ge;
+    shadow_l3e_t *sl3p = se;
+    mfn_t sl2mfn = _mfn(INVALID_MFN);
+    int result = 0;
+
+    perfc_incrc(shadow_validate_gl3e_calls);
+
+    if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT )
+    {
+        gfn_t gl2gfn = guest_l3e_get_gfn(*new_gl3e);
+        mfn_t gl2mfn = vcpu_gfn_to_mfn(v, gl2gfn);
+        if ( valid_mfn(gl2mfn) )
+            sl2mfn = get_shadow_status(v, gl2mfn, PGC_SH_l2_shadow);
+        else
+            result |= SHADOW_SET_ERROR;
+    }
+    l3e_propagate_from_guest(v, new_gl3e, _mfn(INVALID_MFN), 
+                             sl2mfn, &new_sl3e, ft_prefetch);
+    result |= shadow_set_l3e(v, sl3p, new_sl3e, sl3mfn);
+
+#if GUEST_PAGING_LEVELS == 3
+    /* We have changed a PAE l3 entry: need to sync up the possible copies 
+     * of it */
+    if ( result & SHADOW_SET_L3PAE_RECOPY )
+        sh_pae_recopy(v->domain);
+#endif
+
+    return result;
+}
+#endif // GUEST_PAGING_LEVELS >= 3
+
+static int validate_gl2e(struct vcpu *v, void *new_ge, mfn_t sl2mfn, void *se)
+{
+    shadow_l2e_t new_sl2e;
+    guest_l2e_t *new_gl2e = new_ge;
+    shadow_l2e_t *sl2p = se;
+    mfn_t sl1mfn = _mfn(INVALID_MFN);
+    int result = 0;
+
+    perfc_incrc(shadow_validate_gl2e_calls);
+
+    if ( guest_l2e_get_flags(*new_gl2e) & _PAGE_PRESENT )
+    {
+        gfn_t gl1gfn = guest_l2e_get_gfn(*new_gl2e);
+        if ( guest_supports_superpages(v) &&
+             (guest_l2e_get_flags(*new_gl2e) & _PAGE_PSE) )
+        {
+            // superpage -- need to look up the shadow L1 which holds the
+            // splitters...
+            sl1mfn = get_fl1_shadow_status(v, gl1gfn);
+#if 0
+            // XXX - it's possible that we want to do some kind of prefetch
+            // for superpage fl1's here, but this is *not* on the demand path,
+            // so we'll hold off trying that for now...
+            //
+            if ( !valid_mfn(sl1mfn) )
+                sl1mfn = make_fl1_shadow(v, gl1gfn);
+#endif
+        }
+        else
+        {
+            mfn_t gl1mfn = vcpu_gfn_to_mfn(v, gl1gfn);
+            if ( valid_mfn(gl1mfn) )
+                sl1mfn = get_shadow_status(v, gl1mfn, PGC_SH_l1_shadow);
+            else
+                result |= SHADOW_SET_ERROR;
+        }
+    }
+    l2e_propagate_from_guest(v, new_gl2e, _mfn(INVALID_MFN),
+                             sl1mfn, &new_sl2e, ft_prefetch);
+    result |= shadow_set_l2e(v, sl2p, new_sl2e, sl2mfn);
+
+    return result;
+}
+
+static int validate_gl1e(struct vcpu *v, void *new_ge, mfn_t sl1mfn, void *se)
+{
+    shadow_l1e_t new_sl1e;
+    guest_l1e_t *new_gl1e = new_ge;
+    shadow_l1e_t *sl1p = se;
+    gfn_t gfn;
+    mfn_t mfn;
+    int result = 0;
+
+    perfc_incrc(shadow_validate_gl1e_calls);
+
+    gfn = guest_l1e_get_gfn(*new_gl1e);
+    mfn = vcpu_gfn_to_mfn(v, gfn);
+
+    l1e_propagate_from_guest(v, *new_gl1e, &new_sl1e, 
+                             /* mmio? */ !valid_mfn(mfn));
+    
+    result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
+    return result;
+}
+
+
+/**************************************************************************/
+/* Functions which translate and install a the shadows of arbitrary guest 
+ * entries that we have just seen the guest write. */
+
+
+static inline int 
+sh_map_and_validate(struct vcpu *v, mfn_t gmfn,
+                     void *new_gp, u32 size, u32 sh_type, 
+                     u32 (*shadow_index)(mfn_t *smfn, u32 idx),
+                     int (*validate_ge)(struct vcpu *v, void *ge, 
+                                        mfn_t smfn, void *se))
+/* Generic function for mapping and validating. */
+{
+    mfn_t smfn, smfn2, map_mfn;
+    shadow_l1e_t *sl1p;
+    u32 shadow_idx, guest_idx;
+    int result = 0;
+
+    /* Align address and size to guest entry boundaries */
+    size += (unsigned long)new_gp & (sizeof (guest_l1e_t) - 1);
+    new_gp = (void *)((unsigned long)new_gp & ~(sizeof (guest_l1e_t) - 1));
+    size = (size + sizeof (guest_l1e_t) - 1) & ~(sizeof (guest_l1e_t) - 1);
+    ASSERT(size + (((unsigned long)new_gp) & ~PAGE_MASK) <= PAGE_SIZE);
+
+    /* Map the shadow page */
+    smfn = get_shadow_status(v, gmfn, sh_type);
+    ASSERT(valid_mfn(smfn)); /* Otherwise we would not have been called */
+    guest_idx = guest_index(new_gp);
+    map_mfn = smfn;
+    shadow_idx = shadow_index(&map_mfn, guest_idx);
+    sl1p = map_shadow_page(map_mfn);
+
+    /* Validate one entry at a time */
+    while ( size )
+    {
+        smfn2 = smfn;
+        guest_idx = guest_index(new_gp);
+        shadow_idx = shadow_index(&smfn2, guest_idx);
+        if ( mfn_x(smfn2) != mfn_x(map_mfn) )
+        {
+            /* We have moved to another page of the shadow */
+            map_mfn = smfn2;
+            unmap_shadow_page(sl1p);
+            sl1p = map_shadow_page(map_mfn);
+        }
+        result |= validate_ge(v,
+                              new_gp,
+                              map_mfn,
+                              &sl1p[shadow_idx]);
+        size -= sizeof(guest_l1e_t);
+        new_gp += sizeof(guest_l1e_t);
+    }
+    unmap_shadow_page(sl1p);
+    return result;
+}
+
+
+int
+sh_map_and_validate_gl4e(struct vcpu *v, mfn_t gl4mfn,
+                          void *new_gl4p, u32 size)
+{
+#if GUEST_PAGING_LEVELS >= 4
+    return sh_map_and_validate(v, gl4mfn, new_gl4p, size, 
+                                PGC_SH_l4_shadow, 
+                                shadow_l4_index, 
+                                validate_gl4e);
+#else // ! GUEST_PAGING_LEVELS >= 4
+    SHADOW_PRINTK("called in wrong paging mode!\n");
+    BUG();
+    return 0;
+#endif 
+}
+    
+int
+sh_map_and_validate_gl3e(struct vcpu *v, mfn_t gl3mfn,
+                          void *new_gl3p, u32 size)
+{
+#if GUEST_PAGING_LEVELS >= 3
+    return sh_map_and_validate(v, gl3mfn, new_gl3p, size, 
+                                PGC_SH_l3_shadow, 
+                                shadow_l3_index, 
+                                validate_gl3e);
+#else // ! GUEST_PAGING_LEVELS >= 3
+    SHADOW_PRINTK("called in wrong paging mode!\n");
+    BUG();
+    return 0;
+#endif
+}
+
+int
+sh_map_and_validate_gl2e(struct vcpu *v, mfn_t gl2mfn,
+                          void *new_gl2p, u32 size)
+{
+    return sh_map_and_validate(v, gl2mfn, new_gl2p, size, 
+                                PGC_SH_l2_shadow, 
+                                shadow_l2_index, 
+                                validate_gl2e);
+}
+
+int
+sh_map_and_validate_gl2he(struct vcpu *v, mfn_t gl2mfn,
+                           void *new_gl2p, u32 size)
+{
+#if GUEST_PAGING_LEVELS == 3
+    return sh_map_and_validate(v, gl2mfn, new_gl2p, size, 
+                                PGC_SH_l2h_shadow, 
+                                shadow_l2_index, 
+                                validate_gl2e);
+#else /* Non-PAE guests don't have different kinds of l2 table */
+    SHADOW_PRINTK("called in wrong paging mode!\n");
+    BUG();
+    return 0;
+#endif
+}
+
+int
+sh_map_and_validate_gl1e(struct vcpu *v, mfn_t gl1mfn,
+                          void *new_gl1p, u32 size)
+{
+    return sh_map_and_validate(v, gl1mfn, new_gl1p, size, 
+                                PGC_SH_l1_shadow, 
+                                shadow_l1_index, 
+                                validate_gl1e);
+}
+
+
+/**************************************************************************/
+/* Optimization: If we see two emulated writes of zeros to the same
+ * page-table without another kind of page fault in between, we guess
+ * that this is a batch of changes (for process destruction) and
+ * unshadow the page so we don't take a pagefault on every entry.  This
+ * should also make finding writeable mappings of pagetables much
+ * easier. */
+
+/* Look to see if this is the second emulated write in a row to this
+ * page, and unshadow/unhook if it is */
+static inline void check_for_early_unshadow(struct vcpu *v, mfn_t gmfn)
+{
+#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
+    if ( v->arch.shadow.last_emulated_mfn == mfn_x(gmfn) &&
+         sh_mfn_is_a_page_table(gmfn) )
+    {
+        u32 flags = mfn_to_page(gmfn)->shadow_flags;
+        mfn_t smfn;
+        if ( !(flags & (SHF_L2_32|SHF_L3_PAE|SHF_L4_64)) )
+        {
+            perfc_incrc(shadow_early_unshadow);
+            sh_remove_shadows(v, gmfn, 0 /* Can fail to unshadow */ );
+            return;
+        }
+        /* SHF_unhooked_mappings is set to make sure we only unhook
+         * once in a single batch of updates. It is reset when this
+         * top-level page is loaded into CR3 again */
+        if ( !(flags & SHF_unhooked_mappings) ) 
+        {
+            perfc_incrc(shadow_early_unshadow_top);
+            mfn_to_page(gmfn)->shadow_flags |= SHF_unhooked_mappings;
+            if ( flags & SHF_L2_32 )
+            {
+                smfn = get_shadow_status(v, gmfn, PGC_SH_l2_32_shadow);
+                shadow_unhook_mappings(v, smfn);
+            }
+            if ( flags & SHF_L3_PAE ) 
+            {
+                smfn = get_shadow_status(v, gmfn, PGC_SH_l3_pae_shadow);
+                shadow_unhook_mappings(v, smfn);
+            }
+            if ( flags & SHF_L4_64 ) 
+            {
+                smfn = get_shadow_status(v, gmfn, PGC_SH_l4_64_shadow);
+                shadow_unhook_mappings(v, smfn);
+            }
+        }
+    }
+    v->arch.shadow.last_emulated_mfn = mfn_x(gmfn);
+#endif
+}
+
+/* Stop counting towards early unshadows, as we've seen a real page fault */
+static inline void reset_early_unshadow(struct vcpu *v)
+{
+#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
+    v->arch.shadow.last_emulated_mfn = INVALID_MFN;
+#endif
+}
+
+
+
+/**************************************************************************/
+/* Entry points into the shadow code */
+
+/* Called from pagefault handler in Xen, and from the HVM trap handlers
+ * for pagefaults.  Returns 1 if this fault was an artefact of the
+ * shadow code (and the guest should retry) or 0 if it is not (and the
+ * fault should be handled elsewhere or passed to the guest). */
+
+static int sh_page_fault(struct vcpu *v, 
+                          unsigned long va, 
+                          struct cpu_user_regs *regs)
+{
+    struct domain *d = v->domain;
+    walk_t gw;
+    u32 accumulated_gflags;
+    gfn_t gfn;
+    mfn_t gmfn, sl1mfn=_mfn(0);
+    shadow_l1e_t sl1e, *ptr_sl1e;
+    paddr_t gpa;
+    struct cpu_user_regs emul_regs;
+    struct x86_emulate_ctxt emul_ctxt;
+    int r, mmio;
+    fetch_type_t ft = 0;
+
+    //
+    // XXX: Need to think about eventually mapping superpages directly in the
+    //      shadow (when possible), as opposed to splintering them into a
+    //      bunch of 4K maps.
+    //
+
+    SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n",
+                   v->domain->domain_id, v->vcpu_id, va, regs->error_code);
+    
+    shadow_lock(d);
+
+    shadow_audit_tables(v);
+                   
+    if ( guest_walk_tables(v, va, &gw, 1) != 0 )
+    {
+        SHADOW_PRINTK("malformed guest pagetable!");
+        print_gw(&gw);
+    }
+
+    sh_audit_gw(v, &gw);
+
+    // We do not look at the gw->l1e, as that will not exist for superpages.
+    // Instead, we use the gw->eff_l1e...
+    //
+    // We need not check all the levels of the guest page table entries for
+    // present vs not-present, as the eff_l1e will always be not present if
+    // one of the higher level entries is not present.
+    //
+    if ( unlikely(!(guest_l1e_get_flags(gw.eff_l1e) & _PAGE_PRESENT)) )
+    {
+        if ( hvm_guest(v) && !shadow_vcpu_mode_translate(v) )
+        {
+            /* Not present in p2m map, means this is mmio */
+            gpa = va;
+            goto mmio;
+        }
+
+        perfc_incrc(shadow_fault_bail_not_present);
+        goto not_a_shadow_fault;
+    }
+
+    // All levels of the guest page table are now known to be present.
+    accumulated_gflags = accumulate_guest_flags(&gw);
+
+    // Check for attempts to access supervisor-only pages from user mode,
+    // i.e. ring 3.  Such errors are not caused or dealt with by the shadow
+    // code.
+    //
+    if ( (regs->error_code & PFEC_user_mode) &&
+         !(accumulated_gflags & _PAGE_USER) )
+    {
+        /* illegal user-mode access to supervisor-only page */
+        perfc_incrc(shadow_fault_bail_user_supervisor);
+        goto not_a_shadow_fault;
+    }
+
+    // Was it a write fault?
+    //
+    if ( regs->error_code & PFEC_write_access )
+    {
+        if ( unlikely(!(accumulated_gflags & _PAGE_RW)) )
+        {
+            perfc_incrc(shadow_fault_bail_ro_mapping);
+            goto not_a_shadow_fault;
+        }
+    }
+    else // must have been either an insn fetch or read fault
+    {
+        // Check for NX bit violations: attempts to execute code that is
+        // marked "do not execute".  Such errors are not caused or dealt with
+        // by the shadow code.
+        //
+        if ( regs->error_code & PFEC_insn_fetch )
+        {
+            if ( accumulated_gflags & _PAGE_NX_BIT )
+            {
+                /* NX prevented this code fetch */
+                perfc_incrc(shadow_fault_bail_nx);
+                goto not_a_shadow_fault;
+            }
+        }
+    }
+
+    /* Is this an MMIO access? */
+    gfn = guest_l1e_get_gfn(gw.eff_l1e);
+    mmio = ( hvm_guest(v) 
+             && shadow_vcpu_mode_translate(v) 
+             && mmio_space(gfn_to_paddr(gfn)) );
+
+    /* For MMIO, the shadow holds the *gfn*; for normal accesses, if holds 
+     * the equivalent mfn. */
+    if ( mmio ) 
+        gmfn = _mfn(gfn_x(gfn));
+    else
+    {
+        gmfn = vcpu_gfn_to_mfn(v, gfn);
+        if ( !valid_mfn(gmfn) )
+        {
+            perfc_incrc(shadow_fault_bail_bad_gfn);
+            SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"SH_PRI_mfn"\n", 
+                           gfn_x(gfn), mfn_x(gmfn));
+            goto not_a_shadow_fault;
+        }
+    }
+
+    /* Make sure there is enough free shadow memory to build a chain of
+     * shadow tables: one SHADOW_MAX_ORDER chunk will always be enough
+     * to allocate all we need.  (We never allocate a top-level shadow
+     * on this path, only a 32b l1, pae l2+1 or 64b l3+2+1) */
+    shadow_prealloc(d, SHADOW_MAX_ORDER);
+
+    /* Acquire the shadow.  This must happen before we figure out the rights 
+     * for the shadow entry, since we might promote a page here. */
+    // XXX -- this code will need to change somewhat if/when the shadow code
+    // can directly map superpages...
+    ft = ((regs->error_code & PFEC_write_access) ?
+          ft_demand_write : ft_demand_read);
+    ptr_sl1e = shadow_get_and_create_l1e(v, &gw, &sl1mfn, ft);
+    ASSERT(ptr_sl1e);
+
+    /* Calculate the shadow entry */
+    if ( ft == ft_demand_write )
+    {
+        if ( l1e_write_fault(v, &gw, gmfn, &sl1e, mmio) )
+        {
+            perfc_incrc(shadow_fault_emulate_write);
+            goto emulate;
+        }
+    }
+    else if ( l1e_read_fault(v, &gw, gmfn, &sl1e, mmio) )
+    {
+        perfc_incrc(shadow_fault_emulate_read);
+        goto emulate;
+    }
+
+    /* Quick sanity check: we never make an MMIO entry that's got the 
+     * _PAGE_PRESENT flag set in it. */
+    ASSERT(!mmio || !(shadow_l1e_get_flags(sl1e) & _PAGE_PRESENT));
+
+    r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn);
+
+    if ( mmio ) 
+    {
+        gpa = guest_walk_to_gpa(&gw);
+        goto mmio;
+    }
+
+#if 0
+    if ( !(r & SHADOW_SET_CHANGED) )
+        debugtrace_printk("%s: shadow_set_l1e(va=%p, sl1e=%" SH_PRI_pte
+                          ") did not change anything\n",
+                          __func__, gw.va, l1e_get_intpte(sl1e));
+#endif
+
+    perfc_incrc(shadow_fault_fixed);
+    d->arch.shadow.fault_count++;
+    reset_early_unshadow(v);
+
+ done:
+    sh_audit_gw(v, &gw);
+    unmap_walk(v, &gw);
+    SHADOW_PRINTK("fixed\n");
+    shadow_audit_tables(v);
+    shadow_unlock(d);
+    return EXCRET_fault_fixed;
+
+ emulate:
+
+    /* Take the register set we were called with */
+    emul_regs = *regs;
+    if ( hvm_guest(v) )
+    {
+        /* Add the guest's segment selectors, rip, rsp. rflags */ 
+        hvm_store_cpu_guest_regs(v, &emul_regs, NULL);
+    }
+    emul_ctxt.regs = &emul_regs;
+    emul_ctxt.cr2 = va;
+    emul_ctxt.mode = hvm_guest(v) ? hvm_guest_x86_mode(v) : X86EMUL_MODE_HOST;
+
+    SHADOW_PRINTK("emulate: eip=%#lx\n", emul_regs.eip);
+
+    v->arch.shadow.propagate_fault = 0;
+    if ( x86_emulate_memop(&emul_ctxt, &shadow_emulator_ops) )
+    {
+        SHADOW_PRINTK("emulator failure, unshadowing mfn %#lx\n", 
+                       mfn_x(gmfn));
+        perfc_incrc(shadow_fault_emulate_failed);
+        /* If this is actually a page table, then we have a bug, and need 
+         * to support more operations in the emulator.  More likely, 
+         * though, this is a hint that this page should not be shadowed. */
+        shadow_remove_all_shadows(v, gmfn);
+        /* This means that actual missing operations will cause the 
+         * guest to loop on the same page fault. */
+        goto done;
+    }
+    if ( v->arch.shadow.propagate_fault )
+    {
+        /* Emulation triggered another page fault */
+        goto not_a_shadow_fault;
+    }
+
+    /* Emulator has changed the user registers: write back */
+    if ( hvm_guest(v) )
+    {
+        /* Write back the guest's segment selectors, rip, rsp. rflags */ 
+        hvm_load_cpu_guest_regs(v, &emul_regs);
+        /* And don't overwrite those in the caller's regs. */
+        emul_regs.eip = regs->eip;
+        emul_regs.cs = regs->cs;
+        emul_regs.eflags = regs->eflags;
+        emul_regs.esp = regs->esp;
+        emul_regs.ss = regs->ss;
+        emul_regs.es = regs->es;
+        emul_regs.ds = regs->ds;
+        emul_regs.fs = regs->fs;
+        emul_regs.gs = regs->gs;
+    }
+    *regs = emul_regs;
+
+    goto done;
+
+ mmio:
+    perfc_incrc(shadow_fault_mmio);
+    if ( !hvm_apic_support(d) && (gpa >= 0xFEC00000) )
+    {
+        /* Need to deal with these disabled-APIC accesses, as
+         * handle_mmio() apparently does not currently do that. */
+        /* TJD: What about it, then?   For now, I'm turning this BUG() 
+         * into a domain_crash() since we don't want to kill Xen. */
+        SHADOW_ERROR("disabled-APIC access: not supported\n.");
+        domain_crash(d); 
+    }
+    sh_audit_gw(v, &gw);
+    unmap_walk(v, &gw);
+    SHADOW_PRINTK("mmio\n");
+    shadow_audit_tables(v);
+    reset_early_unshadow(v);
+    shadow_unlock(d);
+    sh_log_mmio(v, gpa);
+    handle_mmio(va, gpa);
+    return EXCRET_fault_fixed;
+
+ not_a_shadow_fault:
+    sh_audit_gw(v, &gw);
+    unmap_walk(v, &gw);
+    SHADOW_PRINTK("not a shadow fault\n");
+    shadow_audit_tables(v);
+    reset_early_unshadow(v);
+    shadow_unlock(d);
+    return 0;
+}
+
+
+static int
+sh_invlpg(struct vcpu *v, unsigned long va)
+/* Called when the guest requests an invlpg.  Returns 1 if the invlpg
+ * instruction should be issued on the hardware, or 0 if it's safe not
+ * to do so. */
+{
+    shadow_l2e_t *ptr_sl2e = shadow_get_l2e(v, va);
+
+    // XXX -- might be a good thing to prefetch the va into the shadow
+
+    // no need to flush anything if there's no SL2...
+    //
+    if ( !ptr_sl2e )
+        return 0;
+
+    // If there's nothing shadowed for this particular sl2e, then
+    // there is no need to do an invlpg, either...
+    //
+    if ( !(shadow_l2e_get_flags(*ptr_sl2e) & _PAGE_PRESENT) )
+        return 0;
+
+    // Check to see if the SL2 is a splintered superpage...
+    // If so, then we'll need to flush the entire TLB (because that's
+    // easier than invalidating all of the individual 4K pages).
+    //
+    if ( (mfn_to_page(shadow_l2e_get_mfn(*ptr_sl2e))->count_info &
+          PGC_SH_type_mask) == PGC_SH_fl1_shadow )
+    {
+        local_flush_tlb();
+        return 0;
+    }
+
+    return 1;
+}
+
+static unsigned long
+sh_gva_to_gfn(struct vcpu *v, unsigned long va)
+/* Called to translate a guest virtual address to what the *guest*
+ * pagetables would map it to. */
+{
+    walk_t gw;
+    gfn_t gfn;
+
+    guest_walk_tables(v, va, &gw, 0);
+    gfn = guest_walk_to_gfn(&gw);
+    unmap_walk(v, &gw);
+
+    return gfn_x(gfn);
+}
+
+
+static unsigned long
+sh_gva_to_gpa(struct vcpu *v, unsigned long va)
+/* Called to translate a guest virtual address to what the *guest*
+ * pagetables would map it to. */
+{
+    unsigned long gfn = sh_gva_to_gfn(v, va);
+    if ( gfn == INVALID_GFN )
+        return 0;
+    else
+        return (gfn << PAGE_SHIFT) | (va & ~PAGE_MASK);
+}
+
+
+// XXX -- should this be in this file?
+//        Or should it be moved to shadow-common.c?
+//
+/* returns a lowmem machine address of the copied HVM L3 root table
+ * If clear_res != 0, then clear the PAE-l3 reserved bits in the copy,
+ * otherwise blank out any entries with reserved bits in them.  */
+#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
+static unsigned long
+hvm_pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab, int clear_res)
+{
+    int i, f;
+    int res = (_PAGE_RW|_PAGE_NX_BIT|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY);
+    l3_pgentry_t new_l3e, *copy = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
+    memcpy(copy, l3tab, 4 * sizeof(l3_pgentry_t));
+    for ( i = 0; i < 4; i++ )
+    {
+        f = l3e_get_flags(l3tab[i]);
+        if ( (f & _PAGE_PRESENT) && (!(f & res) || clear_res) )
+            new_l3e = l3e_from_pfn(l3e_get_pfn(l3tab[i]), f & ~res);
+        else
+            new_l3e = l3e_empty();
+        safe_write_entry(&copy[i], &new_l3e);
+    }
+    return __pa(copy);
+}
+#endif
+
+
+static inline void
+sh_update_linear_entries(struct vcpu *v)
+/* Sync up all the linear mappings for this vcpu's pagetables */
+{
+    struct domain *d = v->domain;
+
+    /* Linear pagetables in PV guests
+     * ------------------------------
+     *
+     * Guest linear pagetables, which map the guest pages, are at
+     * LINEAR_PT_VIRT_START.  Shadow linear pagetables, which map the
+     * shadows, are at SH_LINEAR_PT_VIRT_START.  Most of the time these
+     * are set up at shadow creation time, but (of course!) the PAE case
+     * is subtler.  Normal linear mappings are made by having an entry
+     * in the top-level table that points to itself (shadow linear) or
+     * to the guest top-level table (guest linear).  For PAE, to set up
+     * a linear map requires us to copy the four top-level entries into 
+     * level-2 entries.  That means that every time we change a PAE l3e,
+     * we need to reflect the change into the copy.
+     *
+     * Linear pagetables in HVM guests
+     * -------------------------------
+     *
+     * For HVM guests, the linear pagetables are installed in the monitor
+     * tables (since we can't put them in the shadow).  Shadow linear
+     * pagetables, which map the shadows, are at SH_LINEAR_PT_VIRT_START,
+     * and we use the linear pagetable slot at LINEAR_PT_VIRT_START for 
+     * a linear pagetable of the monitor tables themselves.  We have 
+     * the same issue of having to re-copy PAE l3 entries whevever we use
+     * PAE shadows. 
+     *
+     * Because HVM guests run on the same monitor tables regardless of the 
+     * shadow tables in use, the linear mapping of the shadow tables has to 
+     * be updated every time v->arch.shadow_table changes. 
+     */
+
+    /* Don't try to update the monitor table if it doesn't exist */
+    if ( shadow_mode_external(d) 
+         && pagetable_get_pfn(v->arch.monitor_table) == 0 ) 
+        return;
+
+#if (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS == 4)
+    
+    /* For PV, one l4e points at the guest l4, one points at the shadow
+     * l4.  No maintenance required. 
+     * For HVM, just need to update the l4e that points to the shadow l4. */
+
+    if ( shadow_mode_external(d) )
+    {
+        /* Use the linear map if we can; otherwise make a new mapping */
+        if ( v == current ) 
+        {
+            __linear_l4_table[l4_linear_offset(SH_LINEAR_PT_VIRT_START)] = 
+                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
+                             __PAGE_HYPERVISOR);
+        } 
+        else
+        { 
+            l4_pgentry_t *ml4e;
+            ml4e = sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
+            ml4e[l4_table_offset(SH_LINEAR_PT_VIRT_START)] = 
+                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
+                             __PAGE_HYPERVISOR);
+            sh_unmap_domain_page(ml4e);
+        }
+    }
+
+#elif (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS == 3)
+
+    /* This case only exists in HVM.  To give ourselves a linear map of the 
+     * shadows, we need to extend a PAE shadow to 4 levels.  We do this by 
+     * having a monitor l3 in slot 0 of the monitor l4 table, and 
+     * copying the PAE l3 entries into it.  Then, by having the monitor l4e
+     * for shadow pagetables also point to the monitor l4, we can use it
+     * to access the shadows. */
+
+    if ( shadow_mode_external(d) )
+    {
+        /* Install copies of the shadow l3es into the monitor l3 table.
+         * The monitor l3 table is hooked into slot 0 of the monitor
+         * l4 table, so we use l3 linear indices 0 to 3 */
+        shadow_l3e_t *sl3e;
+        l3_pgentry_t *ml3e;
+        mfn_t l3mfn;
+        int i;
+
+        /* Use linear mappings if we can; otherwise make new mappings */
+        if ( v == current ) 
+        {
+            ml3e = __linear_l3_table;
+            l3mfn = _mfn(l4e_get_pfn(__linear_l4_table[0]));
+#if GUEST_PAGING_LEVELS == 2
+            /* Shadow l3 tables are made up by update_cr3 */
+            sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
+#else
+            sl3e = v->arch.shadow_vtable;
+#endif
+        }
+        else 
+        {   
+            l4_pgentry_t *ml4e;
+            ml4e = sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
+            ASSERT(l4e_get_flags(ml4e[0]) & _PAGE_PRESENT);
+            l3mfn = _mfn(l4e_get_pfn(ml4e[0]));
+            ml3e = sh_map_domain_page(l3mfn);
+            sh_unmap_domain_page(ml4e);
+#if GUEST_PAGING_LEVELS == 2
+            /* Shadow l3 tables are made up by update_cr3 */
+            sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
+#else
+            sl3e = sh_map_domain_page(pagetable_get_mfn(v->arch.shadow_table));
+#endif
+        }
+
+        for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
+        {
+            ml3e[i] = 
+                (shadow_l3e_get_flags(sl3e[i]) & _PAGE_PRESENT) 
+                ? l3e_from_pfn(mfn_x(shadow_l3e_get_mfn(sl3e[i])), 
+                               __PAGE_HYPERVISOR) 
+                : l3e_empty();
+        }
+
+        if ( v != current ) 
+        {
+            sh_unmap_domain_page(ml3e);
+#if GUEST_PAGING_LEVELS != 2
+            sh_unmap_domain_page(sl3e);
+#endif
+        }
+    }
+
+#elif CONFIG_PAGING_LEVELS == 3
+
+    /* PV: need to copy the guest's l3 entries into the guest-linear-map l2
+     * entries in the shadow, and the shadow's l3 entries into the 
+     * shadow-linear-map l2 entries in the shadow.  This is safe to do 
+     * because Xen does not let guests share high-slot l2 tables between l3s,
+     * so we know we're not treading on anyone's toes. 
+     *
+     * HVM: need to copy the shadow's l3 entries into the
+     * shadow-linear-map l2 entries in the monitor table.  This is safe
+     * because we have one monitor table for each vcpu.  The monitor's
+     * own l3es don't need to be copied because they never change.  
+     * XXX That might change if we start stuffing things into the rest
+     * of the monitor's virtual address space. 
+     */ 
+    {
+        l2_pgentry_t *l2e, new_l2e;
+        shadow_l3e_t *guest_l3e = NULL, *shadow_l3e;
+        int i;
+
+#if GUEST_PAGING_LEVELS == 2
+        /* Shadow l3 tables were built by update_cr3 */
+        if ( shadow_mode_external(d) )
+            shadow_l3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
+        else
+            BUG(); /* PV 2-on-3 is not supported yet */
+        
+#else /* GUEST_PAGING_LEVELS == 3 */
+        
+        /* Use local vcpu's mappings if we can; otherwise make new mappings */
+        if ( v == current ) 
+        {
+            shadow_l3e = v->arch.shadow_vtable;
+            if ( !shadow_mode_external(d) )
+                guest_l3e = v->arch.guest_vtable;
+        }
+        else 
+        {
+            mfn_t smfn;
+            int idx;
+            
+            /* Map the shadow l3 */
+            smfn = pagetable_get_mfn(v->arch.shadow_table);
+            idx = shadow_l3_index(&smfn, guest_index(v->arch.shadow_vtable));
+            shadow_l3e = sh_map_domain_page(smfn);
+            shadow_l3e += idx;
+            if ( !shadow_mode_external(d) )
+            {
+                /* Also the guest l3 */
+                mfn_t gmfn = pagetable_get_mfn(v->arch.guest_table); 
+                guest_l3e = sh_map_domain_page(gmfn);
+                guest_l3e += guest_index(v->arch.guest_vtable);
+            }
+        }
+#endif /* GUEST_PAGING_LEVELS */
+        
+        /* Choose where to write the entries, using linear maps if possible */
+        if ( v == current && shadow_mode_external(d) ) 
+        {
+            /* From the monitor tables, it's safe to use linear maps to update
+             * monitor l2s */
+            l2e = __linear_l2_table + (3 * L2_PAGETABLE_ENTRIES);
+        }
+        else if ( shadow_mode_external(d) ) 
+        {
+            /* Map the monitor table's high l2 */
+            l3_pgentry_t *l3e;
+            l3e = sh_map_domain_page(
+                pagetable_get_mfn(v->arch.monitor_table));
+            ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
+            l2e = sh_map_domain_page(_mfn(l3e_get_pfn(l3e[3])));
+            sh_unmap_domain_page(l3e);
+        } 
+        else 
+        {
+            /* Map the shadow table's high l2 */
+            ASSERT(shadow_l3e_get_flags(shadow_l3e[3]) & _PAGE_PRESENT);
+            l2e = sh_map_domain_page(shadow_l3e_get_mfn(shadow_l3e[3]));
+        }
+        
+        
+        if ( !shadow_mode_external(d) )
+        {
+            /* Write linear mapping of guest. */
+            for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
+            { 
+                new_l2e = (shadow_l3e_get_flags(guest_l3e[i]) & _PAGE_PRESENT) 
+                    ? l2e_from_pfn(mfn_x(shadow_l3e_get_mfn(guest_l3e[i])),
+                                   __PAGE_HYPERVISOR) 
+                    : l2e_empty();
+                safe_write_entry(
+                    &l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i],
+                    &new_l2e);
+            }
+        }
+        
+        /* Write linear mapping of shadow. */
+        for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
+        {
+            new_l2e = (shadow_l3e_get_flags(shadow_l3e[i]) & _PAGE_PRESENT) 
+                ? l2e_from_pfn(mfn_x(shadow_l3e_get_mfn(shadow_l3e[i])),
+                               __PAGE_HYPERVISOR) 
+                : l2e_empty();
+            safe_write_entry(
+                &l2e[l2_table_offset(SH_LINEAR_PT_VIRT_START) + i],
+                &new_l2e);
+        }
+        
+        if ( v != current || !shadow_mode_external(d) )
+            sh_unmap_domain_page(l2e);
+        
+#if GUEST_PAGING_LEVELS == 3
+        if ( v != current) 
+        {
+            sh_unmap_domain_page(shadow_l3e);
+            if ( !shadow_mode_external(d) )
+                sh_unmap_domain_page(guest_l3e);
+        }
+#endif
+    }
+
+#elif CONFIG_PAGING_LEVELS == 2
+
+    /* For PV, one l2e points at the guest l2, one points at the shadow
+     * l2. No maintenance required. 
+     * For HVM, just need to update the l2e that points to the shadow l2. */
+
+    if ( shadow_mode_external(d) )
+    {
+        /* Use the linear map if we can; otherwise make a new mapping */
+        if ( v == current ) 
+        {
+            __linear_l2_table[l2_linear_offset(SH_LINEAR_PT_VIRT_START)] = 
+                l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
+                             __PAGE_HYPERVISOR);
+        } 
+        else
+        { 
+            l2_pgentry_t *ml2e;
+            ml2e = sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
+            ml2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = 
+                l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
+                             __PAGE_HYPERVISOR);
+            sh_unmap_domain_page(ml2e);
+        }
+    }
+
+#else
+#error this should not happen
+#endif
+}
+
+
+// XXX -- should this be in this file?
+//        Or should it be moved to shadow-common.c?
+//
+#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
+void sh_pae_recopy(struct domain *d)
+/* Called whenever we write to the l3 entries of a PAE pagetable which 
+ * is currently in use.  Each vcpu that is using the table needs to 
+ * resync its copies of the l3s in linear maps and any low-memory
+ * copies it might have made for fitting into 32bit CR3.
+ * Since linear maps are also resynced when we change CR3, we don't
+ * need to worry about changes to PAE l3es that are not currently in use.*/
+{
+    struct vcpu *v;
+    cpumask_t flush_mask = CPU_MASK_NONE;
+    ASSERT(shadow_lock_is_acquired(d));
+    
+    for_each_vcpu(d, v)
+    {
+        if ( !v->arch.shadow.pae_flip_pending ) 
+            continue;
+
+        cpu_set(v->processor, flush_mask);
+        
+        SHADOW_PRINTK("d=%u v=%u\n", v->domain->domain_id, v->vcpu_id);
+
+        /* This vcpu has a copy in its linear maps */
+        sh_update_linear_entries(v);
+        if ( hvm_guest(v) )
+        {
+            /* This vcpu has a copy in its HVM PAE l3 */
+            v->arch.hvm_vcpu.hw_cr3 = 
+                hvm_pae_copy_root(v, v->arch.shadow_vtable,
+                                  !shadow_vcpu_mode_translate(v));
+        }
+#if CONFIG_PAGING_LEVELS == 3
+        else 
+        {
+            /* This vcpu might have copied the l3 to below 4GB */
+            if ( v->arch.cr3 >> PAGE_SHIFT 
+                 != pagetable_get_pfn(v->arch.shadow_table) )
+            {
+                /* Recopy to where that copy is. */
+                int i;
+                l3_pgentry_t *dst, *src;
+                dst = __va(v->arch.cr3 & ~0x1f); /* Mask cache control bits */
+                src = v->arch.shadow_vtable;
+                for ( i = 0 ; i < 4 ; i++ ) 
+                    safe_write_entry(dst + i, src + i);
+            }
+        }
+#endif
+        v->arch.shadow.pae_flip_pending = 0;        
+    }
+
+    flush_tlb_mask(flush_mask);
+}
+#endif /* (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3) */
+
+
+/* removes:
+ *     vcpu->arch.guest_vtable
+ *     vcpu->arch.shadow_table
+ *     vcpu->arch.shadow_vtable
+ * Does all appropriate management/bookkeeping/refcounting/etc...
+ */
+static void
+sh_detach_old_tables(struct vcpu *v)
+{
+    mfn_t smfn;
+
+    ////
+    //// vcpu->arch.guest_vtable
+    ////
+    if ( (shadow_mode_external(v->domain) || (GUEST_PAGING_LEVELS == 3)) &&
+         v->arch.guest_vtable )
+    {
+        // Q: why does this need to use (un)map_domain_page_*global* ?
+        sh_unmap_domain_page_global(v->arch.guest_vtable);
+        v->arch.guest_vtable = NULL;
+    }
+
+    ////
+    //// vcpu->arch.shadow_table
+    ////
+    smfn = pagetable_get_mfn(v->arch.shadow_table);
+    if ( mfn_x(smfn) )
+    {
+        ASSERT(v->arch.shadow_vtable);
+
+#if GUEST_PAGING_LEVELS == 3
+        // PAE guests do not (necessarily) use an entire page for their
+        // 4-entry L3s, so we have to deal with them specially.
+        //
+        sh_put_ref_l3_subshadow(v, v->arch.shadow_vtable, smfn);
+#else
+        sh_put_ref(v, smfn, 0);
+#endif
+
+#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
+        {
+            struct pae_l3_bookkeeping *info =
+                sl3p_to_info(v->arch.shadow_vtable);
+            ASSERT(test_bit(v->vcpu_id, &info->vcpus));
+            clear_bit(v->vcpu_id, &info->vcpus);
+        }
+#endif
+        v->arch.shadow_table = pagetable_null();
+    }
+
+    ////
+    //// vcpu->arch.shadow_vtable
+    ////
+    if ( (shadow_mode_external(v->domain) || (GUEST_PAGING_LEVELS == 3)) &&
+         v->arch.shadow_vtable )
+    {
+        // Q: why does this need to use (un)map_domain_page_*global* ?
+        //
+        sh_unmap_domain_page_global(v->arch.shadow_vtable);
+        v->arch.shadow_vtable = NULL;
+    }
+}
+
+static void
+sh_update_cr3(struct vcpu *v)
+/* Updates vcpu->arch.shadow_table after the guest has changed CR3.
+ * Paravirtual guests should set v->arch.guest_table (and guest_table_user,
+ * if appropriate).
+ * HVM guests should also set hvm_get_guest_cntl_reg(v, 3)...
+ */
+{
+    struct domain *d = v->domain;
+    mfn_t gmfn, smfn;
+#if GUEST_PAGING_LEVELS == 3
+    u32 guest_idx=0;
+#endif
+
+    ASSERT(shadow_lock_is_acquired(v->domain));
+    ASSERT(v->arch.shadow.mode);
+
+    ////
+    //// vcpu->arch.guest_table is already set
+    ////
+    
+#ifndef NDEBUG 
+    /* Double-check that the HVM code has sent us a sane guest_table */
+    if ( hvm_guest(v) )
+    {
+        gfn_t gfn;
+
+        ASSERT(shadow_mode_external(d));
+
+        // Is paging enabled on this vcpu?
+        if ( shadow_vcpu_mode_translate(v) )
+        {
+            gfn = _gfn(paddr_to_pfn(hvm_get_guest_ctrl_reg(v, 3)));
+            gmfn = vcpu_gfn_to_mfn(v, gfn);
+            ASSERT(valid_mfn(gmfn));
+            ASSERT(pagetable_get_pfn(v->arch.guest_table) == mfn_x(gmfn));
+        } 
+        else 
+        {
+            /* Paging disabled: guest_table points at (part of) p2m */
+#if SHADOW_PAGING_LEVELS != 3 /* in 3-on-4, guest-table is in slot 0 of p2m */
+            /* For everything else, they sould be the same */
+            ASSERT(v->arch.guest_table.pfn == d->arch.phys_table.pfn);
+#endif
+        }
+    }
+#endif
+
+    SHADOW_PRINTK("d=%u v=%u guest_table=%05lx\n",
+                   d->domain_id, v->vcpu_id, 
+                   (unsigned long)pagetable_get_pfn(v->arch.guest_table));
+
+#if GUEST_PAGING_LEVELS == 4
+    if ( !(v->arch.flags & TF_kernel_mode) )
+        gmfn = pagetable_get_mfn(v->arch.guest_table_user);
+    else
+#endif
+        gmfn = pagetable_get_mfn(v->arch.guest_table);
+
+    sh_detach_old_tables(v);
+
+    if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
+    {
+        ASSERT(v->arch.cr3 == 0);
+        return;
+    }
+
+    ////
+    //// vcpu->arch.guest_vtable
+    ////
+    if ( shadow_mode_external(d) )
+    {
+#if GUEST_PAGING_LEVELS == 3
+        if ( shadow_vcpu_mode_translate(v) ) 
+            /* Paging enabled: find where in the page the l3 table is */
+            guest_idx = guest_index((void *)hvm_get_guest_ctrl_reg(v, 3));
+        else
+            /* Paging disabled: l3 is at the start of a page (in the p2m) */ 
+            guest_idx = 0; 
+
+        // Ignore the low 2 bits of guest_idx -- they are really just
+        // cache control.
+        guest_idx &= ~3;
+        // XXX - why does this need a global map?
+        v->arch.guest_vtable =
+            (guest_l3e_t *)sh_map_domain_page_global(gmfn) + guest_idx;
+#else
+        // XXX - why does this need a global map?
+        v->arch.guest_vtable = sh_map_domain_page_global(gmfn);
+#endif
+    }
+    else
+    {
+#ifdef __x86_64__
+        v->arch.guest_vtable = __linear_l4_table;
+#elif GUEST_PAGING_LEVELS == 3
+        // XXX - why does this need a global map?
+        v->arch.guest_vtable = sh_map_domain_page_global(gmfn);
+#else
+        v->arch.guest_vtable = __linear_l2_table;
+#endif
+    }
+
+#if 0
+    printk("%s %s %d gmfn=%05lx guest_vtable=%p\n",
+           __func__, __FILE__, __LINE__, gmfn, v->arch.guest_vtable);
+#endif
+
+    ////
+    //// vcpu->arch.shadow_table
+    ////
+    smfn = get_shadow_status(v, gmfn, PGC_SH_guest_root_type);
+    if ( valid_mfn(smfn) )
+    {
+        /* Pull this root shadow to the front of the list of roots. */
+        list_del(&mfn_to_page(smfn)->list);
+        list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
+    }
+    else
+    {
+        /* This guest MFN is a pagetable.  Must revoke write access. */
+        if ( shadow_remove_write_access(v, gmfn, GUEST_PAGING_LEVELS, 0) 
+             != 0 )
+            flush_tlb_mask(d->domain_dirty_cpumask); 
+        /* Make sure there's enough free shadow memory. */
+        shadow_prealloc(d, SHADOW_MAX_ORDER); 
+        /* Shadow the page. */
+        smfn = sh_make_shadow(v, gmfn, PGC_SH_guest_root_type);
+        list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
+    }
+    ASSERT(valid_mfn(smfn));
+    v->arch.shadow_table = pagetable_from_mfn(smfn);
+
+#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
+    /* Once again OK to unhook entries from this table if we see fork/exit */
+    ASSERT(sh_mfn_is_a_page_table(gmfn));
+    mfn_to_page(gmfn)->shadow_flags &= ~SHF_unhooked_mappings;
+#endif
+
+
+    ////
+    //// vcpu->arch.shadow_vtable
+    ////
+    if ( shadow_mode_external(d) )
+    {
+#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
+        mfn_t adjusted_smfn = smfn;
+        u32 shadow_idx = shadow_l3_index(&adjusted_smfn, guest_idx);
+        // Q: why does this need to use (un)map_domain_page_*global* ?
+        v->arch.shadow_vtable =
+            (shadow_l3e_t *)sh_map_domain_page_global(adjusted_smfn) +
+            shadow_idx;
+#else
+        // Q: why does this need to use (un)map_domain_page_*global* ?
+        v->arch.shadow_vtable = sh_map_domain_page_global(smfn);
+#endif
+    }
+    else
+    {
+#if SHADOW_PAGING_LEVELS == 4
+        v->arch.shadow_vtable = __sh_linear_l4_table;
+#elif GUEST_PAGING_LEVELS == 3
+        // XXX - why does this need a global map?
+        v->arch.shadow_vtable = sh_map_domain_page_global(smfn);
+#else
+        v->arch.shadow_vtable = __sh_linear_l2_table;
+#endif
+    }
+
+    ////
+    //// Take a ref to the new shadow table, and pin it.
+    ////
+    //
+    // This ref is logically "held" by v->arch.shadow_table entry itself.
+    // Release the old ref.
+    //
+#if GUEST_PAGING_LEVELS == 3
+    // PAE guests do not (necessarily) use an entire page for their
+    // 4-entry L3s, so we have to deal with them specially.
+    //
+    // XXX - might want to revisit this if/when we do multiple compilation for
+    //       HVM-vs-PV guests, as PAE PV guests could get away without doing
+    //       subshadows.
+    //
+    sh_get_ref_l3_subshadow(v->arch.shadow_vtable, smfn);
+    sh_pin_l3_subshadow(v->arch.shadow_vtable, smfn);
+#else
+    sh_get_ref(smfn, 0);
+    sh_pin(smfn);
+#endif
+
+#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
+    // PAE 3-on-3 shadows have to keep track of which vcpu's are using
+    // which l3 subshadow, in order handle the SHADOW_SET_L3PAE_RECOPY
+    // case from validate_gl3e().  Search for SHADOW_SET_L3PAE_RECOPY
+    // in the code for more info.
+    //
+    {
+        struct pae_l3_bookkeeping *info =
+            sl3p_to_info(v->arch.shadow_vtable);
+        ASSERT(!test_bit(v->vcpu_id, &info->vcpus));
+        set_bit(v->vcpu_id, &info->vcpus);
+    }
+#endif
+
+    debugtrace_printk("%s cr3 gmfn=%05lx smfn=%05lx\n",
+                      __func__, gmfn, smfn);
+
+    ///
+    /// v->arch.cr3 and, if appropriate, v->arch.hvm_vcpu.hw_cr3
+    ///
+    if ( shadow_mode_external(d) )
+    {
+        ASSERT(hvm_guest(v));
+        make_cr3(v, pagetable_get_pfn(v->arch.monitor_table));
+
+#if (GUEST_PAGING_LEVELS == 2) && (SHADOW_PAGING_LEVELS != 2)
+#if SHADOW_PAGING_LEVELS != 3
+#error unexpected combination of GUEST and SHADOW paging levels
+#endif
+        /* 2-on-3: make a PAE l3 table that points at the four-page l2 */
+        {
+            mfn_t smfn = pagetable_get_mfn(v->arch.shadow_table);
+            int i;
+
+            ASSERT(v->arch.hvm_vcpu.hw_cr3 ==
+                   virt_to_maddr(v->arch.hvm_vcpu.hvm_lowmem_l3tab));
+            for (i = 0; i < 4; i++)
+            {
+                v->arch.hvm_vcpu.hvm_lowmem_l3tab[i] =
+                    shadow_l3e_from_mfn(_mfn(mfn_x(smfn)+i), _PAGE_PRESENT);
+            }
+        }
+#elif (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
+        /* 3-on-3: copy the shadow l3 to slots that are below 4GB.
+         * If paging is disabled, clear l3e reserved bits; otherwise 
+         * remove entries that have reserved bits set. */
+        v->arch.hvm_vcpu.hw_cr3 =
+            hvm_pae_copy_root(v, v->arch.shadow_vtable, 
+                              !shadow_vcpu_mode_translate(v));
+#else
+        /* 2-on-2 or 4-on-4: just put the shadow top-level into cr3 */
+        v->arch.hvm_vcpu.hw_cr3 =
+            pagetable_get_paddr(v->arch.shadow_table);
+#endif
+    }
+    else // not shadow_mode_external...
+    {
+        /* We don't support PV except guest == shadow == config levels */
+        BUG_ON(GUEST_PAGING_LEVELS != SHADOW_PAGING_LEVELS);
+        make_cr3(v, pagetable_get_pfn(v->arch.shadow_table));
+    }
+
+    /* Fix up the linear pagetable mappings */
+    sh_update_linear_entries(v);
+}
+
+
+/**************************************************************************/
+/* Functions to revoke guest rights */
+
+#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
+static int sh_guess_wrmap(struct vcpu *v, unsigned long vaddr, mfn_t gmfn)
+/* Look up this vaddr in the current shadow and see if it's a writeable
+ * mapping of this gmfn.  If so, remove it.  Returns 1 if it worked. */
+{
+    shadow_l1e_t sl1e, *sl1p;
+    shadow_l2e_t *sl2p;
+#if GUEST_PAGING_LEVELS >= 3
+    shadow_l3e_t *sl3p;
+#if GUEST_PAGING_LEVELS >= 4
+    shadow_l4e_t *sl4p;
+#endif
+#endif
+    mfn_t sl1mfn;
+
+
+    /* Carefully look in the shadow linear map for the l1e we expect */
+    if ( v->arch.shadow_vtable == NULL ) return 0;
+#if GUEST_PAGING_LEVELS >= 4
+    sl4p = sh_linear_l4_table(v) + shadow_l4_linear_offset(vaddr);
+    if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) )
+        return 0;
+    sl3p = sh_linear_l3_table(v) + shadow_l3_linear_offset(vaddr);
+    if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) )
+        return 0;
+#elif GUEST_PAGING_LEVELS == 3
+    sl3p = ((shadow_l3e_t *) v->arch.shadow_vtable) 
+        + shadow_l3_linear_offset(vaddr);
+    if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) )
+        return 0;
+#endif
+    sl2p = sh_linear_l2_table(v) + shadow_l2_linear_offset(vaddr);
+    if ( !(shadow_l2e_get_flags(*sl2p) & _PAGE_PRESENT) )
+        return 0;
+    sl1p = sh_linear_l1_table(v) + shadow_l1_linear_offset(vaddr);
+    sl1e = *sl1p;
+    if ( ((shadow_l1e_get_flags(sl1e) & (_PAGE_PRESENT|_PAGE_RW))
+          != (_PAGE_PRESENT|_PAGE_RW))
+         || (mfn_x(shadow_l1e_get_mfn(sl1e)) != mfn_x(gmfn)) )
+        return 0;
+
+    /* Found it!  Need to remove its write permissions. */
+    sl1mfn = shadow_l2e_get_mfn(*sl2p);
+    sl1e = shadow_l1e_remove_flags(sl1e, _PAGE_RW);
+    shadow_set_l1e(v, sl1p, sl1e, sl1mfn);
+    return 1;
+}
+#endif
+
+int sh_remove_write_access(struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn)
+/* Excises all writeable mappings to readonly_mfn from this l1 shadow table */
+{
+    shadow_l1e_t *sl1e;
+    int done = 0;
+    int flags;
+    
+    SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, 
+    {
+        flags = shadow_l1e_get_flags(*sl1e);
+        if ( (flags & _PAGE_PRESENT) 
+             && (flags & _PAGE_RW) 
+             && (mfn_x(shadow_l1e_get_mfn(*sl1e)) == mfn_x(readonly_mfn)) )
+        {
+            shadow_set_l1e(v, sl1e, shadow_l1e_empty(), sl1mfn);
+            if ( (mfn_to_page(readonly_mfn)->u.inuse.type_info
+                  & PGT_count_mask) == 0 )
+                /* This breaks us cleanly out of the FOREACH macro */
+                done = 1;
+        }
+    });
+    return done;
+}
+
+
+int sh_remove_all_mappings(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn)
+/* Excises all mappings to guest frame from this shadow l1 table */
+{
+    shadow_l1e_t *sl1e;
+    int done = 0;
+    int flags;
+    
+    SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, 
+    {
+        flags = shadow_l1e_get_flags(*sl1e);
+        if ( (flags & _PAGE_PRESENT) 
+             && (mfn_x(shadow_l1e_get_mfn(*sl1e)) == mfn_x(target_mfn)) )
+        {
+            shadow_set_l1e(v, sl1e, shadow_l1e_empty(), sl1mfn);
+            if ( (mfn_to_page(target_mfn)->count_info & PGC_count_mask) == 0 )
+                /* This breaks us cleanly out of the FOREACH macro */
+                done = 1;
+        }
+    });
+    return done;
+}
+
+/**************************************************************************/
+/* Functions to excise all pointers to shadows from higher-level shadows. */
+
+void sh_clear_shadow_entry(struct vcpu *v, void *ep, mfn_t smfn)
+/* Blank out a single shadow entry */
+{
+    switch (mfn_to_page(smfn)->count_info & PGC_SH_type_mask) 
+    {
+    case PGC_SH_l1_shadow:
+        shadow_set_l1e(v, ep, shadow_l1e_empty(), smfn); break;
+    case PGC_SH_l2_shadow:
+#if GUEST_PAGING_LEVELS == 3
+    case PGC_SH_l2h_shadow:
+#endif
+        shadow_set_l2e(v, ep, shadow_l2e_empty(), smfn); break;
+#if GUEST_PAGING_LEVELS >= 3
+    case PGC_SH_l3_shadow:
+        shadow_set_l3e(v, ep, shadow_l3e_empty(), smfn); break;
+#if GUEST_PAGING_LEVELS >= 4
+    case PGC_SH_l4_shadow:
+        shadow_set_l4e(v, ep, shadow_l4e_empty(), smfn); break;
+#endif
+#endif
+    default: BUG(); /* Called with the wrong kind of shadow. */
+    }
+}
+
+int sh_remove_l1_shadow(struct vcpu *v, mfn_t sl2mfn, mfn_t sl1mfn)
+/* Remove all mappings of this l1 shadow from this l2 shadow */
+{
+    shadow_l2e_t *sl2e;
+    int done = 0;
+    int flags;
+#if GUEST_PAGING_LEVELS != 4
+    int xen_mappings = !shadow_mode_external(v->domain);
+#endif
+    
+    SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, done, xen_mappings, 
+    {
+        flags = shadow_l2e_get_flags(*sl2e);
+        if ( (flags & _PAGE_PRESENT) 
+             && (mfn_x(shadow_l2e_get_mfn(*sl2e)) == mfn_x(sl1mfn)) )
+        {
+            shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
+            if ( (mfn_to_page(sl1mfn)->count_info & PGC_SH_type_mask) == 0 )
+                /* This breaks us cleanly out of the FOREACH macro */
+                done = 1;
+        }
+    });
+    return done;
+}
+
+#if GUEST_PAGING_LEVELS >= 3
+int sh_remove_l2_shadow(struct vcpu *v, mfn_t sl3mfn, mfn_t sl2mfn)
+/* Remove all mappings of this l2 shadow from this l3 shadow */
+{
+    shadow_l3e_t *sl3e;
+    int done = 0;
+    int flags;
+    
+    SHADOW_FOREACH_L3E(sl3mfn, sl3e, 0, done, 
+    {
+        flags = shadow_l3e_get_flags(*sl3e);
+        if ( (flags & _PAGE_PRESENT) 
+             && (mfn_x(shadow_l3e_get_mfn(*sl3e)) == mfn_x(sl2mfn)) )
+        {
+            shadow_set_l3e(v, sl3e, shadow_l3e_empty(), sl3mfn);
+            if ( (mfn_to_page(sl2mfn)->count_info & PGC_SH_type_mask) == 0 )
+                /* This breaks us cleanly out of the FOREACH macro */
+                done = 1;
+        }
+    });
+    return done;
+}
+
+#if GUEST_PAGING_LEVELS >= 4
+int sh_remove_l3_shadow(struct vcpu *v, mfn_t sl4mfn, mfn_t sl3mfn)
+/* Remove all mappings of this l3 shadow from this l4 shadow */
+{
+    shadow_l4e_t *sl4e;
+    int done = 0;
+    int flags, xen_mappings = !shadow_mode_external(v->domain);
+    
+    SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, done, xen_mappings,
+    {
+        flags = shadow_l4e_get_flags(*sl4e);
+        if ( (flags & _PAGE_PRESENT) 
+             && (mfn_x(shadow_l4e_get_mfn(*sl4e)) == mfn_x(sl3mfn)) )
+        {
+            shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn);
+            if ( (mfn_to_page(sl3mfn)->count_info & PGC_SH_type_mask) == 0 )
+                /* This breaks us cleanly out of the FOREACH macro */
+                done = 1;
+        }
+    });
+    return done;
+}
+#endif /* 64bit guest */ 
+#endif /* PAE guest */
+
+/**************************************************************************/
+/* Handling HVM guest writes to pagetables  */
+
+/* Check that the user is allowed to perform this write. 
+ * Returns a mapped pointer to write to, and the mfn it's on,
+ * or NULL for error. */
+static inline void * emulate_map_dest(struct vcpu *v,
+                                      unsigned long vaddr,
+                                      struct x86_emulate_ctxt *ctxt,
+                                      mfn_t *mfnp)
+{
+    walk_t gw;
+    u32 flags;
+    gfn_t gfn;
+    mfn_t mfn;
+
+    guest_walk_tables(v, vaddr, &gw, 1);
+    flags = accumulate_guest_flags(&gw);
+    gfn = guest_l1e_get_gfn(gw.eff_l1e);
+    mfn = vcpu_gfn_to_mfn(v, gfn);
+    sh_audit_gw(v, &gw);
+    unmap_walk(v, &gw);
+
+    if ( !(flags & _PAGE_PRESENT) 
+         || !(flags & _PAGE_RW) 
+         || (!(flags & _PAGE_USER) && ring_3(ctxt->regs)) )
+    {
+        /* This write would have faulted even on bare metal */
+        v->arch.shadow.propagate_fault = 1;
+        return NULL;
+    }
+    
+    if ( !valid_mfn(mfn) )
+    {
+        /* Attempted a write to a bad gfn.  This should never happen:
+         * after all, we're here because this write is to a page table. */
+        BUG();
+    }
+
+    ASSERT(sh_mfn_is_a_page_table(mfn));
+    *mfnp = mfn;
+    return sh_map_domain_page(mfn) + (vaddr & ~PAGE_MASK);
+}
+
+int
+sh_x86_emulate_write(struct vcpu *v, unsigned long vaddr, void *src,
+                      u32 bytes, struct x86_emulate_ctxt *ctxt)
+{
+    ASSERT(shadow_lock_is_acquired(v->domain));
+    while ( bytes > 0 )
+    {
+        mfn_t mfn;
+        int bytes_on_page;
+        void *addr;
+
+        bytes_on_page = PAGE_SIZE - (vaddr & ~PAGE_MASK);
+        if ( bytes_on_page > bytes )
+            bytes_on_page = bytes;
+
+        if ( (addr = emulate_map_dest(v, vaddr, ctxt, &mfn)) == NULL )
+            return X86EMUL_PROPAGATE_FAULT;
+        memcpy(addr, src, bytes_on_page);
+        shadow_validate_guest_pt_write(v, mfn, addr, bytes_on_page);
+        bytes -= bytes_on_page;
+        /* If we are writing zeros to this page, might want to unshadow */
+        if ( *(u8 *)addr == 0 )
+            check_for_early_unshadow(v, mfn);
+        sh_unmap_domain_page(addr);
+    }
+    shadow_audit_tables(v);
+    return X86EMUL_CONTINUE;
+}
+
+int
+sh_x86_emulate_cmpxchg(struct vcpu *v, unsigned long vaddr, 
+                        unsigned long old, unsigned long new,
+                        unsigned int bytes, struct x86_emulate_ctxt *ctxt)
+{
+    mfn_t mfn;
+    void *addr;
+    unsigned long prev;
+    int rv = X86EMUL_CONTINUE;
+
+    ASSERT(shadow_lock_is_acquired(v->domain));
+    ASSERT(bytes <= sizeof (unsigned long));
+
+    if ( (addr = emulate_map_dest(v, vaddr, ctxt, &mfn)) == NULL )
+        return X86EMUL_PROPAGATE_FAULT;
+
+    switch (bytes) 
+    {
+    case 1: prev = cmpxchg(((u8 *)addr), old, new);  break;
+    case 2: prev = cmpxchg(((u16 *)addr), old, new); break;
+    case 4: prev = cmpxchg(((u32 *)addr), old, new); break;
+    case 8: prev = cmpxchg(((u64 *)addr), old, new); break;
+    default:
+        SHADOW_PRINTK("cmpxchg of size %i is not supported\n", bytes);
+        prev = ~old;
+    }
+
+    if ( (prev == old)  )
+        shadow_validate_guest_pt_write(v, mfn, addr, bytes);
+    else
+        rv = X86EMUL_CMPXCHG_FAILED;
+
+    SHADOW_DEBUG(EMULATE, "va %#lx was %#lx expected %#lx"
+                  " wanted %#lx now %#lx bytes %u\n",
+                  vaddr, prev, old, new, *(unsigned long *)addr, bytes);
+
+    /* If we are writing zeros to this page, might want to unshadow */
+    if ( *(u8 *)addr == 0 )
+        check_for_early_unshadow(v, mfn);
+
+    sh_unmap_domain_page(addr);
+    shadow_audit_tables(v);
+    check_for_early_unshadow(v, mfn);
+    return rv;
+}
+
+int
+sh_x86_emulate_cmpxchg8b(struct vcpu *v, unsigned long vaddr, 
+                          unsigned long old_lo, unsigned long old_hi,
+                          unsigned long new_lo, unsigned long new_hi,
+                          struct x86_emulate_ctxt *ctxt)
+{
+    mfn_t mfn;
+    void *addr;
+    u64 old, new, prev;
+    int rv = X86EMUL_CONTINUE;
+
+    ASSERT(shadow_lock_is_acquired(v->domain));
+
+    if ( (addr = emulate_map_dest(v, vaddr, ctxt, &mfn)) == NULL )
+        return X86EMUL_PROPAGATE_FAULT;
+
+    old = (((u64) old_hi) << 32) | (u64) old_lo;
+    new = (((u64) new_hi) << 32) | (u64) new_lo;
+    prev = cmpxchg(((u64 *)addr), old, new);
+
+    if ( (prev == old)  )
+        shadow_validate_guest_pt_write(v, mfn, addr, 8);
+    else
+        rv = X86EMUL_CMPXCHG_FAILED;
+
+    /* If we are writing zeros to this page, might want to unshadow */
+    if ( *(u8 *)addr == 0 )
+        check_for_early_unshadow(v, mfn);
+
+    sh_unmap_domain_page(addr);
+    shadow_audit_tables(v);
+    check_for_early_unshadow(v, mfn);
+    return rv;
+}
+
+
+/**************************************************************************/
+/* Audit tools */
+
+#if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES
+
+#define AUDIT_FAIL(_level, _fmt, _a...) do {                               \
+    printk("Shadow %u-on-%u audit failed at level %i, index %i\n"         \
+           "gl" #_level "mfn = %" SH_PRI_mfn                              \
+           " sl" #_level "mfn = %" SH_PRI_mfn                             \
+           " &gl" #_level "e = %p &sl" #_level "e = %p"                    \
+           " gl" #_level "e = %" SH_PRI_gpte                              \
+           " sl" #_level "e = %" SH_PRI_pte "\nError: " _fmt "\n",        \
+           GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS,                      \
+           _level, guest_index(gl ## _level ## e),                         \
+           mfn_x(gl ## _level ## mfn), mfn_x(sl ## _level ## mfn),         \
+           gl ## _level ## e, sl ## _level ## e,                           \
+           gl ## _level ## e->l ## _level, sl ## _level ## e->l ## _level, \
+           ##_a);                                                          \
+    BUG();                                                                 \
+    done = 1;                                                              \
+} while (0)
+
+
+static char * sh_audit_flags(struct vcpu *v, int level,
+                              int gflags, int sflags) 
+/* Common code for auditing flag bits */
+{
+    if ( (sflags & _PAGE_PRESENT) && !(gflags & _PAGE_PRESENT) )
+        return "shadow is present but guest is not present";
+    if ( (sflags & _PAGE_GLOBAL) && !hvm_guest(v) ) 
+        return "global bit set in PV shadow";
+    if ( (level == 1 || (level == 2 && (gflags & _PAGE_PSE)))
+         && ((sflags & _PAGE_DIRTY) && !(gflags & _PAGE_DIRTY)) ) 
+        return "dirty bit not propagated";
+    if ( level == 2 && (sflags & _PAGE_PSE) )
+        return "PS bit set in shadow";
+#if SHADOW_PAGING_LEVELS == 3
+    if ( level == 3 ) return NULL; /* All the other bits are blank in PAEl3 */
+#endif
+    if ( (sflags & _PAGE_USER) != (gflags & _PAGE_USER) ) 
+        return "user/supervisor bit does not match";
+    if ( (sflags & _PAGE_NX_BIT) != (gflags & _PAGE_NX_BIT) ) 
+        return "NX bit does not match";
+    if ( (sflags & _PAGE_RW) && !(gflags & _PAGE_RW) ) 
+        return "shadow grants write access but guest does not";
+    if ( (sflags & _PAGE_ACCESSED) && !(gflags & _PAGE_ACCESSED) ) 
+        return "accessed bit not propagated";
+    return NULL;
+}
+
+static inline mfn_t
+audit_gfn_to_mfn(struct vcpu *v, gfn_t gfn, mfn_t gmfn)
+/* Convert this gfn to an mfn in the manner appropriate for the
+ * guest pagetable it's used in (gmfn) */ 
+{
+    if ( !shadow_mode_translate(v->domain) )
+        return _mfn(gfn_x(gfn));
+    
+    if ( (mfn_to_page(gmfn)->u.inuse.type_info & PGT_type_mask)
+         != PGT_writable_page ) 
+        return _mfn(gfn_x(gfn)); /* This is a paging-disabled shadow */
+    else 
+        return sh_gfn_to_mfn(v->domain, gfn_x(gfn));
+} 
+
+
+int sh_audit_l1_table(struct vcpu *v, mfn_t sl1mfn, mfn_t x)
+{
+    guest_l1e_t *gl1e, *gp;
+    shadow_l1e_t *sl1e;
+    mfn_t mfn, gmfn, gl1mfn;
+    gfn_t gfn;
+    char *s;
+    int done = 0;
+
+    /* Follow the backpointer */
+    gl1mfn = _mfn(mfn_to_page(sl1mfn)->u.inuse.type_info);
+    gl1e = gp = sh_map_domain_page(gl1mfn);
+    SHADOW_FOREACH_L1E(sl1mfn, sl1e, &gl1e, done, {
+
+        s = sh_audit_flags(v, 1, guest_l1e_get_flags(*gl1e),
+                            shadow_l1e_get_flags(*sl1e));
+        if ( s ) AUDIT_FAIL(1, "%s", s);
+
+        if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS )
+        {
+            gfn = guest_l1e_get_gfn(*gl1e);
+            mfn = shadow_l1e_get_mfn(*sl1e);
+            gmfn = audit_gfn_to_mfn(v, gfn, gl1mfn);
+            if ( mfn_x(gmfn) != mfn_x(mfn) )
+                AUDIT_FAIL(1, "bad translation: gfn %" SH_PRI_gfn
+                           " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn "\n",
+                           gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn));
+        }
+    });
+    sh_unmap_domain_page(gp);
+    return done;
+}
+
+int sh_audit_fl1_table(struct vcpu *v, mfn_t sl1mfn, mfn_t x)
+{
+    guest_l1e_t *gl1e, e;
+    shadow_l1e_t *sl1e;
+    mfn_t gl1mfn = _mfn(INVALID_MFN);
+    int f;
+    int done = 0;
+
+    /* fl1 has no useful backpointer: all we can check are flags */
+    e = guest_l1e_from_gfn(_gfn(0), 0); gl1e = &e; /* Needed for macro */
+    SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, {
+        f = shadow_l1e_get_flags(*sl1e);
+        f &= ~(_PAGE_AVAIL0|_PAGE_AVAIL1|_PAGE_AVAIL2);
+        if ( !(f == 0 
+               || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
+                        _PAGE_ACCESSED|_PAGE_DIRTY) 
+               || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY)) )
+            AUDIT_FAIL(1, "fl1e has bad flags");
+    });
+    return 0;
+}
+
+int sh_audit_l2_table(struct vcpu *v, mfn_t sl2mfn, mfn_t x)
+{
+    guest_l2e_t *gl2e, *gp;
+    shadow_l2e_t *sl2e;
+    mfn_t mfn, gmfn, gl2mfn;
+    gfn_t gfn;
+    char *s;
+    int done = 0;
+#if GUEST_PAGING_LEVELS != 4
+    int xen_mappings = !shadow_mode_external(v->domain);
+#endif
+
+    /* Follow the backpointer */
+    gl2mfn = _mfn(mfn_to_page(sl2mfn)->u.inuse.type_info);
+    gl2e = gp = sh_map_domain_page(gl2mfn);
+    SHADOW_FOREACH_L2E(sl2mfn, sl2e, &gl2e, done, xen_mappings, {
+
+        s = sh_audit_flags(v, 2, guest_l2e_get_flags(*gl2e),
+                            shadow_l2e_get_flags(*sl2e));
+        if ( s ) AUDIT_FAIL(2, "%s", s);
+
+        if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS )
+        {
+            gfn = guest_l2e_get_gfn(*gl2e);
+            mfn = shadow_l2e_get_mfn(*sl2e);
+            gmfn = (guest_l2e_get_flags(*gl2e) & _PAGE_PSE)  
+                ? get_fl1_shadow_status(v, gfn)
+                : get_shadow_status(v, audit_gfn_to_mfn(v, gfn, gl2mfn), 
+                                    PGC_SH_l1_shadow);
+            if ( mfn_x(gmfn) != mfn_x(mfn) )
+                AUDIT_FAIL(2, "bad translation: gfn %" SH_PRI_gfn
+                           " (--> %" SH_PRI_mfn ")"
+                           " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn "\n",
+                           gfn_x(gfn), 
+                           (guest_l2e_get_flags(*gl2e) & _PAGE_PSE) ? 0
+                           : mfn_x(audit_gfn_to_mfn(v, gfn, gl2mfn)),
+                           mfn_x(gmfn), mfn_x(mfn));
+        }
+    });
+    sh_unmap_domain_page(gp);
+    return 0;
+}
+
+#if GUEST_PAGING_LEVELS >= 3
+int sh_audit_l3_table(struct vcpu *v, mfn_t sl3mfn, mfn_t x)
+{
+    guest_l3e_t *gl3e, *gp;
+    shadow_l3e_t *sl3e;
+    mfn_t mfn, gmfn, gl3mfn;
+    gfn_t gfn;
+    char *s;
+    int done = 0;
+
+    /* Follow the backpointer */
+    gl3mfn = _mfn(mfn_to_page(sl3mfn)->u.inuse.type_info);
+    gl3e = gp = sh_map_domain_page(gl3mfn);
+    SHADOW_FOREACH_L3E(sl3mfn, sl3e, &gl3e, done, {
+
+        s = sh_audit_flags(v, 3, guest_l3e_get_flags(*gl3e),
+                            shadow_l3e_get_flags(*sl3e));
+        if ( s ) AUDIT_FAIL(3, "%s", s);
+
+        if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS )
+        {
+            gfn = guest_l3e_get_gfn(*gl3e);
+            mfn = shadow_l3e_get_mfn(*sl3e);
+            gmfn = get_shadow_status(v, audit_gfn_to_mfn(v, gfn, gl3mfn), 
+                                     (GUEST_PAGING_LEVELS == 3 
+                                      && !shadow_mode_external(v->domain)
+                                      && (guest_index(gl3e) % 4) == 3)
+                                     ? PGC_SH_l2h_pae_shadow
+                                     : PGC_SH_l2_shadow);
+            if ( mfn_x(gmfn) != mfn_x(mfn) )
+                AUDIT_FAIL(3, "bad translation: gfn %" SH_PRI_gfn
+                           " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn "\n",
+                           gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn));
+        }
+    });
+    sh_unmap_domain_page(gp);
+    return 0;
+}
+#endif /* GUEST_PAGING_LEVELS >= 3 */
+
+#if GUEST_PAGING_LEVELS >= 4
+int sh_audit_l4_table(struct vcpu *v, mfn_t sl4mfn, mfn_t x)
+{
+    guest_l4e_t *gl4e, *gp;
+    shadow_l4e_t *sl4e;
+    mfn_t mfn, gmfn, gl4mfn;
+    gfn_t gfn;
+    char *s;
+    int done = 0;
+    int xen_mappings = !shadow_mode_external(v->domain);
+
+    /* Follow the backpointer */
+    gl4mfn = _mfn(mfn_to_page(sl4mfn)->u.inuse.type_info);
+    gl4e = gp = sh_map_domain_page(gl4mfn);
+    SHADOW_FOREACH_L4E(sl4mfn, sl4e, &gl4e, done, xen_mappings,
+    {
+        s = sh_audit_flags(v, 4, guest_l4e_get_flags(*gl4e),
+                            shadow_l4e_get_flags(*sl4e));
+        if ( s ) AUDIT_FAIL(4, "%s", s);
+
+        if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS )
+        {
+            gfn = guest_l4e_get_gfn(*gl4e);
+            mfn = shadow_l4e_get_mfn(*sl4e);
+            gmfn = get_shadow_status(v, audit_gfn_to_mfn(v, gfn, gl4mfn), 
+                                     PGC_SH_l3_shadow);
+            if ( mfn_x(gmfn) != mfn_x(mfn) )
+                AUDIT_FAIL(4, "bad translation: gfn %" SH_PRI_gfn
+                           " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn "\n",
+                           gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn));
+        }
+    });
+    sh_unmap_domain_page(gp);
+    return 0;
+}
+#endif /* GUEST_PAGING_LEVELS >= 4 */
+
+
+#undef AUDIT_FAIL
+
+#endif /* Audit code */
+
+/**************************************************************************/
+/* Entry points into this mode of the shadow code.
+ * This will all be mangled by the preprocessor to uniquify everything. */
+struct shadow_paging_mode sh_paging_mode = {
+    .page_fault             = sh_page_fault, 
+    .invlpg                 = sh_invlpg,
+    .gva_to_gpa             = sh_gva_to_gpa,
+    .gva_to_gfn             = sh_gva_to_gfn,
+    .update_cr3             = sh_update_cr3,
+    .map_and_validate_gl1e  = sh_map_and_validate_gl1e,
+    .map_and_validate_gl2e  = sh_map_and_validate_gl2e,
+    .map_and_validate_gl2he = sh_map_and_validate_gl2he,
+    .map_and_validate_gl3e  = sh_map_and_validate_gl3e,
+    .map_and_validate_gl4e  = sh_map_and_validate_gl4e,
+    .detach_old_tables      = sh_detach_old_tables,
+    .x86_emulate_write      = sh_x86_emulate_write,
+    .x86_emulate_cmpxchg    = sh_x86_emulate_cmpxchg,
+    .x86_emulate_cmpxchg8b  = sh_x86_emulate_cmpxchg8b,
+    .make_monitor_table     = sh_make_monitor_table,
+    .destroy_monitor_table  = sh_destroy_monitor_table,
+#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
+    .guess_wrmap            = sh_guess_wrmap,
+#endif
+    .guest_levels           = GUEST_PAGING_LEVELS,
+    .shadow_levels          = SHADOW_PAGING_LEVELS,
+};
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End: 
+ */
diff --git a/xen/arch/x86/mm/shadow/multi.h b/xen/arch/x86/mm/shadow/multi.h

new file mode 100644 (file)

index 0000000..26a4675
--- /dev/null
+++ b/xen/arch/x86/mm/shadow/multi.h
@@ -0,0 +1,116 @@
+/******************************************************************************
+ * arch/x86/mm/shadow/multi.h
+ *
+ * Shadow declarations which will be multiply compiled.
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+extern int 
+SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, SHADOW_LEVELS, GUEST_LEVELS)(
+    struct vcpu *v, mfn_t gl1mfn, void *new_gl1p, u32 size);
+extern int 
+SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, SHADOW_LEVELS, GUEST_LEVELS)(
+    struct vcpu *v, mfn_t gl2mfn, void *new_gl2p, u32 size);
+extern int 
+SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2he, SHADOW_LEVELS, GUEST_LEVELS)(
+    struct vcpu *v, mfn_t gl2mfn, void *new_gl2p, u32 size);
+extern int 
+SHADOW_INTERNAL_NAME(sh_map_and_validate_gl3e, SHADOW_LEVELS, GUEST_LEVELS)(
+    struct vcpu *v, mfn_t gl3mfn, void *new_gl3p, u32 size);
+extern int 
+SHADOW_INTERNAL_NAME(sh_map_and_validate_gl4e, SHADOW_LEVELS, GUEST_LEVELS)(
+    struct vcpu *v, mfn_t gl4mfn, void *new_gl4p, u32 size);
+
+extern void 
+SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, SHADOW_LEVELS, GUEST_LEVELS)(
+    struct vcpu *v, mfn_t smfn);
+extern void 
+SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, SHADOW_LEVELS, GUEST_LEVELS)(
+    struct vcpu *v, mfn_t smfn);
+extern void 
+SHADOW_INTERNAL_NAME(sh_destroy_l3_shadow, SHADOW_LEVELS, GUEST_LEVELS)(
+    struct vcpu *v, mfn_t smfn);
+extern void 
+SHADOW_INTERNAL_NAME(sh_destroy_l4_shadow, SHADOW_LEVELS, GUEST_LEVELS)(
+    struct vcpu *v, mfn_t smfn);
+
+extern void
+SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows, 3, 3)
+    (struct vcpu *v, mfn_t smfn);
+
+extern void 
+SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl2mfn);
+extern void 
+SHADOW_INTERNAL_NAME(sh_unhook_pae_mappings, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl3mfn);
+extern void 
+SHADOW_INTERNAL_NAME(sh_unhook_64b_mappings, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl4mfn);
+
+extern int
+SHADOW_INTERNAL_NAME(sh_remove_write_access, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn);
+extern int
+SHADOW_INTERNAL_NAME(sh_remove_all_mappings, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn);
+
+extern void
+SHADOW_INTERNAL_NAME(sh_clear_shadow_entry, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, void *ep, mfn_t smfn);
+
+extern int
+SHADOW_INTERNAL_NAME(sh_remove_l1_shadow, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl2mfn, mfn_t sl1mfn);
+extern int
+SHADOW_INTERNAL_NAME(sh_remove_l2_shadow, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl3mfn, mfn_t sl2mfn);
+extern int
+SHADOW_INTERNAL_NAME(sh_remove_l3_shadow, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl4mfn, mfn_t sl3mfn);
+
+#if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES
+int 
+SHADOW_INTERNAL_NAME(sh_audit_l1_table, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl1mfn, mfn_t x);
+int 
+SHADOW_INTERNAL_NAME(sh_audit_fl1_table, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl1mfn, mfn_t x);
+int 
+SHADOW_INTERNAL_NAME(sh_audit_l2_table, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl2mfn, mfn_t x);
+int 
+SHADOW_INTERNAL_NAME(sh_audit_l3_table, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl3mfn, mfn_t x);
+int 
+SHADOW_INTERNAL_NAME(sh_audit_l4_table, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl4mfn, mfn_t x);
+#endif
+
+#if SHADOW_LEVELS == GUEST_LEVELS
+extern mfn_t
+SHADOW_INTERNAL_NAME(sh_make_monitor_table, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v);
+extern void
+SHADOW_INTERNAL_NAME(sh_destroy_monitor_table, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t mmfn);
+#endif
+
+extern struct shadow_paging_mode 
+SHADOW_INTERNAL_NAME(sh_paging_mode, SHADOW_LEVELS, GUEST_LEVELS);
diff --git a/xen/arch/x86/mm/shadow/page-guest32.h b/xen/arch/x86/mm/shadow/page-guest32.h

new file mode 100644 (file)

index 0000000..e932061
--- /dev/null
+++ b/xen/arch/x86/mm/shadow/page-guest32.h
@@ -0,0 +1,105 @@
+
+#ifndef __X86_PAGE_GUEST_H__
+#define __X86_PAGE_GUEST_H__
+
+#ifndef __ASSEMBLY__
+# include <asm/types.h>
+#endif
+
+#define PAGETABLE_ORDER_32         10
+#define L1_PAGETABLE_ENTRIES_32    (1<<PAGETABLE_ORDER_32)
+#define L2_PAGETABLE_ENTRIES_32    (1<<PAGETABLE_ORDER_32)
+#define ROOT_PAGETABLE_ENTRIES_32  L2_PAGETABLE_ENTRIES_32
+
+
+#define L1_PAGETABLE_SHIFT_32 12
+#define L2_PAGETABLE_SHIFT_32 22
+
+/* Extract flags into 12-bit integer, or turn 12-bit flags into a pte mask. */
+
+#ifndef __ASSEMBLY__
+
+typedef u32 intpte_32_t;
+
+typedef struct { intpte_32_t l1; } l1_pgentry_32_t;
+typedef struct { intpte_32_t l2; } l2_pgentry_32_t;
+typedef l2_pgentry_t root_pgentry_32_t;
+#endif
+
+#define get_pte_flags_32(x) ((u32)(x) & 0xFFF)
+#define put_pte_flags_32(x) ((intpte_32_t)(x))
+
+/* Get pte access flags (unsigned int). */
+#define l1e_get_flags_32(x)           (get_pte_flags_32((x).l1))
+#define l2e_get_flags_32(x)           (get_pte_flags_32((x).l2))
+
+#define l1e_get_paddr_32(x)           \
+    ((paddr_t)(((x).l1 & (PADDR_MASK&PAGE_MASK))))
+#define l2e_get_paddr_32(x)           \
+    ((paddr_t)(((x).l2 & (PADDR_MASK&PAGE_MASK))))
+
+/* Construct an empty pte. */
+#define l1e_empty_32()                ((l1_pgentry_32_t) { 0 })
+#define l2e_empty_32()                ((l2_pgentry_32_t) { 0 })
+
+/* Construct a pte from a pfn and access flags. */
+#define l1e_from_pfn_32(pfn, flags)   \
+    ((l1_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) | put_pte_flags_32(flags) })
+#define l2e_from_pfn_32(pfn, flags)   \
+    ((l2_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) | put_pte_flags_32(flags) })
+
+/* Construct a pte from a physical address and access flags. */
+#ifndef __ASSEMBLY__
+static inline l1_pgentry_32_t l1e_from_paddr_32(paddr_t pa, unsigned int flags)
+{
+    ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0);
+    return (l1_pgentry_32_t) { pa | put_pte_flags_32(flags) };
+}
+static inline l2_pgentry_32_t l2e_from_paddr_32(paddr_t pa, unsigned int flags)
+{
+    ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0);
+    return (l2_pgentry_32_t) { pa | put_pte_flags_32(flags) };
+}
+#endif /* !__ASSEMBLY__ */
+
+
+/* Construct a pte from a page pointer and access flags. */
+#define l1e_from_page_32(page, flags) (l1e_from_pfn_32(page_to_mfn(page),(flags)))
+#define l2e_from_page_32(page, flags) (l2e_from_pfn_32(page_to_mfn(page),(flags)))
+
+/* Add extra flags to an existing pte. */
+#define l1e_add_flags_32(x, flags)    ((x).l1 |= put_pte_flags_32(flags))
+#define l2e_add_flags_32(x, flags)    ((x).l2 |= put_pte_flags_32(flags))
+
+/* Remove flags from an existing pte. */
+#define l1e_remove_flags_32(x, flags) ((x).l1 &= ~put_pte_flags_32(flags))
+#define l2e_remove_flags_32(x, flags) ((x).l2 &= ~put_pte_flags_32(flags))
+
+/* Check if a pte's page mapping or significant access flags have changed. */
+#define l1e_has_changed_32(x,y,flags) \
+    ( !!(((x).l1 ^ (y).l1) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags))) )
+#define l2e_has_changed_32(x,y,flags) \
+    ( !!(((x).l2 ^ (y).l2) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags))) )
+
+/* Given a virtual address, get an entry offset into a page table. */
+#define l1_table_offset_32(a)         \
+    (((a) >> L1_PAGETABLE_SHIFT_32) & (L1_PAGETABLE_ENTRIES_32 - 1))
+#define l2_table_offset_32(a)         \
+    (((a) >> L2_PAGETABLE_SHIFT_32) & (L2_PAGETABLE_ENTRIES_32 - 1))
+
+#define linear_l1_table_32                                                 \
+    ((l1_pgentry_32_t *)(LINEAR_PT_VIRT_START))
+
+#define linear_pg_table_32 linear_l1_table_32
+
+#endif /* __X86_PAGE_GUEST_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h

new file mode 100644 (file)

index 0000000..f470a87
--- /dev/null
+++ b/xen/arch/x86/mm/shadow/private.h
@@ -0,0 +1,593 @@
+/******************************************************************************
+ * arch/x86/mm/shadow/private.h
+ *
+ * Shadow code that is private, and does not need to be multiply compiled.
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _XEN_SHADOW_PRIVATE_H
+#define _XEN_SHADOW_PRIVATE_H
+
+// In order to override the definition of mfn_to_page, we make sure page.h has
+// been included...
+#include <asm/page.h>
+#include <xen/domain_page.h>
+#include <asm/x86_emulate.h>
+#include <asm/hvm/support.h>
+
+
+/******************************************************************************
+ * Definitions for the use of the "available" bits in the shadow PTEs.
+ *
+ * Review of the low 12 bits of a shadow page table entry:
+ *
+ *         in a guest:                      in a shadow:
+ * Bit 11: _PAGE_AVAIL2, aka _PAGE_GNTTAB
+ * Bit 10: _PAGE_AVAIL1                     _PAGE_SHADOW_RW ("SW" below)
+ * Bit  9: _PAGE_AVAIL0                     _PAGE_SHADOW_PRESENT ("SP" below)
+ * Bit  8: _PAGE_GLOBAL                     _PAGE_SHADOW_MMIO ("MMIO" below),
+ *                                          aka _PAGE_SHADOW_GUEST_NOT_PRESENT
+ * Bit  7: _PAGE_PSE, aka _PAGE_PAT
+ * Bit  6: _PAGE_DIRTY
+ * Bit  5: _PAGE_ACCESSED
+ * Bit  4: _PAGE_PCD
+ * Bit  3: _PAGE_PWT
+ * Bit  2: _PAGE_USER
+ * Bit  1: _PAGE_RW ("GW" below)
+ * Bit  0: _PAGE_PRESENT ("GP" below)
+ *
+ * Given a guest entry, as shown below, we can expect the following in the
+ * corresponding shadow entry:
+ *
+ * Guest entry  Shadow entry      Commentary
+ * -----------  ----------------  ---------------------------------------------
+ *       Maps     
+ * GP GW  IO    GP SP GW SW MMIO 
+ * -- -- ----   -- -- -- -- ----
+ *  -  -   -     0  0  0  0   0   The guest entry has not yet been shadowed.
+ *  0  -   -     0  0  0  0   1   The guest entry is marked not-present.
+ *  1  1  no     ?  1  ?  1   0   Writable entry in the guest.
+ *  1  0  no     ?  1  0  0   0   Read-only entry in the guest.
+ *  1  1  yes    0  1  ?  1   1   Writable MMIO mapping in the guest.
+ *  1  0  yes    0  1  0  0   1   Read-only MMIO mapping in the guest.
+ *
+ * Normally, we would expect that GP=1 in the guest to imply GP=1 in the
+ * shadow, and similarly for GW=1.  However, various functionality that may be
+ * implemented via the shadow can cause GP or GW to be cleared in such cases.
+ * A & D bit emulation is a prime example of such functionality.
+ *
+ * If _PAGE_SHADOW_PRESENT is zero, then the _PAGE_PRESENT bit in that same
+ * entry will always be zero, too.
+
+ * Bit 11 is used in debug builds as the _PAGE_GNTTAB bit in PV guests.  It is
+ * currently available for random (ab)use in shadow entries.
+ *
+ * Bit 8 (the global bit) could be propagated from an HVM guest to the shadow,
+ * but currently there is no benefit, as the guest's TLB is flushed on every
+ * transition of CR3 anyway due to the HVM exit/re-entry.
+ *
+ * In shadow entries in which the _PAGE_SHADOW_PRESENT is set, bit 8 is used
+ * as the _PAGE_SHADOW_MMIO bit.  In such entries, if _PAGE_SHADOW_MMIO is
+ * set, then the entry contains the *gfn* directly from the corresponding
+ * guest entry (not an mfn!!).
+ *
+ * Bit 7 is set in a guest L2 to signify a superpage entry.  The current
+ * shadow code splinters superpage mappings into 512 or 1024 4K mappings; the
+ * resulting shadow L1 table is called an FL1.  Note that there is no guest
+ * page that corresponds to an FL1.
+ *
+ * Bit 7 in a guest L1 is the PAT2 bit.  Currently we do not support PAT in
+ * this shadow code.
+ *
+ * Bit 6 is the dirty bit.
+ *
+ * Bit 5 is the accessed bit.
+ *
+ * Bit 4 is the cache disable bit.  If set in a guest, the hardware is
+ * supposed to refuse to cache anything found via this entry.  It can be set
+ * in an L4e, L3e, L2e, or L1e.  This shadow code currently does not support
+ * cache disable bits.  They are silently ignored.
+ *
+ * Bit 4 is a guest L1 is also the PAT1 bit.  Currently we do not support PAT
+ * in this shadow code.
+ *
+ * Bit 3 is the cache write-thru bit.  If set in a guest, the hardware is
+ * supposed to use write-thru instead of write-back caching for anything found
+ * via this entry.  It can be set in an L4e, L3e, L2e, or L1e.  This shadow
+ * code currently does not support cache write-thru bits.  They are silently
+ * ignored.
+ *
+ * Bit 3 is a guest L1 is also the PAT0 bit.  Currently we do not support PAT
+ * in this shadow code.
+ *
+ * Bit 2 is the user bit.
+ *
+ * Bit 1 is the read-write bit.
+ *
+ * Bit 0 is the present bit.
+ */
+
+// Copy of the _PAGE_RW bit from the guest's PTE, appropriately zero'ed by
+// the appropriate shadow rules.
+#define _PAGE_SHADOW_RW                 _PAGE_AVAIL1
+
+// Copy of the _PAGE_PRESENT bit from the guest's PTE
+#define _PAGE_SHADOW_PRESENT            _PAGE_AVAIL0
+
+// The matching guest entry maps MMIO space
+#define _PAGE_SHADOW_MMIO               _PAGE_GLOBAL
+
+// Shadow flags value used when the guest is not present
+#define _PAGE_SHADOW_GUEST_NOT_PRESENT  _PAGE_GLOBAL
+
+
+/******************************************************************************
+ * Debug and error-message output
+ */
+#define SHADOW_PRINTK(_f, _a...)                                     \
+    debugtrace_printk("sh: %s(): " _f, __func__, ##_a)
+#define SHADOW_ERROR(_f, _a...)                                      \
+    printk("sh error: %s(): " _f, __func__, ##_a)
+#define SHADOW_DEBUG(flag, _f, _a...)                                \
+    do {                                                              \
+        if (SHADOW_DEBUG_ ## flag)                                   \
+            debugtrace_printk("shdebug: %s(): " _f, __func__, ##_a); \
+    } while (0)
+
+// The flags for use with SHADOW_DEBUG:
+#define SHADOW_DEBUG_PROPAGATE         0
+#define SHADOW_DEBUG_MAKE_SHADOW       0
+#define SHADOW_DEBUG_DESTROY_SHADOW    0
+#define SHADOW_DEBUG_P2M               0
+#define SHADOW_DEBUG_A_AND_D           0
+#define SHADOW_DEBUG_EMULATE           0
+#define SHADOW_DEBUG_LOGDIRTY          1
+
+
+/******************************************************************************
+ * Auditing routines 
+ */
+
+#if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_FULL
+extern void shadow_audit_tables(struct vcpu *v);
+#else
+#define shadow_audit_tables(_v) do {} while(0)
+#endif
+
+#if SHADOW_AUDIT & SHADOW_AUDIT_P2M
+extern void shadow_audit_p2m(struct domain *d);
+#else
+#define shadow_audit_p2m(_d) do {} while(0)
+#endif
+
+
+/******************************************************************************
+ * Mechanism for double-checking the optimized pagefault path: this
+ * structure contains a record of actions taken by the fault handling
+ * code.  In paranoid mode, the fast-path code fills out one of these
+ * structures (but doesn't take any actual action) and then the normal 
+ * path fills in another.  When the fault handler finishes, the 
+ * two are compared */
+
+#ifdef SHADOW_OPTIMIZATION_PARANOIA
+
+typedef struct shadow_action_log sh_log_t;
+struct shadow_action_log {
+    paddr_t ad[CONFIG_PAGING_LEVELS];  /* A & D bits propagated here */
+    paddr_t mmio;                      /* Address of an mmio operation */
+    int rv;                            /* Result of the fault handler */
+};
+
+/* There are two logs, one for the fast path, one for the normal path */
+enum sh_log_type { log_slow = 0, log_fast= 1 };
+
+/* Alloc and zero the logs */
+static inline void sh_init_log(struct vcpu *v) 
+{
+    if ( unlikely(!v->arch.shadow.action_log) ) 
+        v->arch.shadow.action_log = xmalloc_array(sh_log_t, 2);
+    ASSERT(v->arch.shadow.action_log);
+    memset(v->arch.shadow.action_log, 0, 2 * sizeof (sh_log_t));
+}
+
+/* Log an A&D-bit update */
+static inline void sh_log_ad(struct vcpu *v, paddr_t e, unsigned int level)
+{
+    v->arch.shadow.action_log[v->arch.shadow.action_index].ad[level] = e;
+}
+
+/* Log an MMIO address */
+static inline void sh_log_mmio(struct vcpu *v, paddr_t m)
+{
+    v->arch.shadow.action_log[v->arch.shadow.action_index].mmio = m;
+}
+
+/* Log the result */
+static inline void sh_log_rv(struct vcpu *v, int rv)
+{
+    v->arch.shadow.action_log[v->arch.shadow.action_index].rv = rv;
+}
+
+/* Set which mode we're in */
+static inline void sh_set_log_mode(struct vcpu *v, enum sh_log_type t) 
+{
+    v->arch.shadow.action_index = t;
+}
+
+/* Know not to take action, because we're only checking the mechanism */
+static inline int sh_take_no_action(struct vcpu *v) 
+{
+    return (v->arch.shadow.action_index == log_fast);
+}
+
+#else /* Non-paranoid mode: these logs do not exist */
+
+#define sh_init_log(_v) do { (void)(_v); } while(0)
+#define sh_set_log_mode(_v,_t) do { (void)(_v); } while(0)
+#define sh_log_ad(_v,_e,_l) do { (void)(_v),(void)(_e),(void)(_l); } while (0)
+#define sh_log_mmio(_v,_m) do { (void)(_v),(void)(_m); } while (0)
+#define sh_log_rv(_v,_r) do { (void)(_v),(void)(_r); } while (0)
+#define sh_take_no_action(_v) (((void)(_v)), 0)
+
+#endif /* SHADOW_OPTIMIZATION_PARANOIA */
+
+
+/******************************************************************************
+ * Macro for dealing with the naming of the internal names of the
+ * shadow code's external entry points.
+ */
+#define SHADOW_INTERNAL_NAME_HIDDEN(name, shadow_levels, guest_levels) \
+    name ## __shadow_ ## shadow_levels ## _guest_ ## guest_levels
+#define SHADOW_INTERNAL_NAME(name, shadow_levels, guest_levels) \
+    SHADOW_INTERNAL_NAME_HIDDEN(name, shadow_levels, guest_levels)
+
+#if CONFIG_PAGING_LEVELS == 2
+#define GUEST_LEVELS  2
+#define SHADOW_LEVELS 2
+#include "multi.h"
+#undef GUEST_LEVELS
+#undef SHADOW_LEVELS
+#endif /* CONFIG_PAGING_LEVELS == 2 */
+
+#if CONFIG_PAGING_LEVELS == 3
+#define GUEST_LEVELS  2
+#define SHADOW_LEVELS 3
+#include "multi.h"
+#undef GUEST_LEVELS
+#undef SHADOW_LEVELS
+
+#define GUEST_LEVELS  3
+#define SHADOW_LEVELS 3
+#include "multi.h"
+#undef GUEST_LEVELS
+#undef SHADOW_LEVELS
+#endif /* CONFIG_PAGING_LEVELS == 3 */
+
+#if CONFIG_PAGING_LEVELS == 4
+#define GUEST_LEVELS  2
+#define SHADOW_LEVELS 3
+#include "multi.h"
+#undef GUEST_LEVELS
+#undef SHADOW_LEVELS
+
+#define GUEST_LEVELS  3
+#define SHADOW_LEVELS 3
+#include "multi.h"
+#undef GUEST_LEVELS
+#undef SHADOW_LEVELS
+
+#define GUEST_LEVELS  3
+#define SHADOW_LEVELS 4
+#include "multi.h"
+#undef GUEST_LEVELS
+#undef SHADOW_LEVELS
+
+#define GUEST_LEVELS  4
+#define SHADOW_LEVELS 4
+#include "multi.h"
+#undef GUEST_LEVELS
+#undef SHADOW_LEVELS
+#endif /* CONFIG_PAGING_LEVELS == 4 */
+
+
+/******************************************************************************
+ * Various function declarations 
+ */
+
+/* x86 emulator support */
+extern struct x86_emulate_ops shadow_emulator_ops;
+
+/* Hash table functions */
+mfn_t shadow_hash_lookup(struct vcpu *v, unsigned long n, u8 t);
+void  shadow_hash_insert(struct vcpu *v, unsigned long n, u8 t, mfn_t smfn);
+void  shadow_hash_delete(struct vcpu *v, unsigned long n, u8 t, mfn_t smfn);
+
+/* shadow promotion */
+void shadow_promote(struct vcpu *v, mfn_t gmfn, u32 type);
+void shadow_demote(struct vcpu *v, mfn_t gmfn, u32 type);
+
+/* Shadow page allocation functions */
+void  shadow_prealloc(struct domain *d, unsigned int order);
+mfn_t shadow_alloc(struct domain *d, 
+                    u32 shadow_type,
+                    unsigned long backpointer);
+void  shadow_free(struct domain *d, mfn_t smfn);
+
+/* Function to convert a shadow to log-dirty */
+void shadow_convert_to_log_dirty(struct vcpu *v, mfn_t smfn);
+
+/* Dispatcher function: call the per-mode function that will unhook the
+ * non-Xen mappings in this top-level shadow mfn */
+void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn);
+
+/* Re-sync copies of PAE shadow L3 tables if they have been changed */
+void sh_pae_recopy(struct domain *d);
+
+/* Install the xen mappings in various flavours of shadow */
+void sh_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn);
+void sh_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn);
+void sh_install_xen_entries_in_l3(struct vcpu *v, mfn_t gl3mfn, mfn_t sl3mfn);
+void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn);
+
+
+/******************************************************************************
+ * MFN/page-info handling 
+ */
+
+// Override mfn_to_page from asm/page.h, which was #include'd above,
+// in order to make it work with our mfn type.
+#undef mfn_to_page
+#define mfn_to_page(_mfn) (frame_table + mfn_x(_mfn))
+
+// Override page_to_mfn from asm/page.h, which was #include'd above,
+// in order to make it work with our mfn type.
+#undef page_to_mfn
+#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
+
+// Override mfn_valid from asm/page.h, which was #include'd above,
+// in order to make it work with our mfn type.
+#undef mfn_valid
+#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+
+// Provide mfn_t-aware versions of common xen functions
+static inline void *
+sh_map_domain_page(mfn_t mfn)
+{
+    /* XXX Using the monitor-table as a map will happen here  */
+    return map_domain_page(mfn_x(mfn));
+}
+
+static inline void 
+sh_unmap_domain_page(void *p) 
+{
+    /* XXX Using the monitor-table as a map will happen here  */
+    unmap_domain_page(p);
+}
+
+static inline void *
+sh_map_domain_page_global(mfn_t mfn)
+{
+    /* XXX Using the monitor-table as a map will happen here  */
+    return map_domain_page_global(mfn_x(mfn));
+}
+
+static inline void 
+sh_unmap_domain_page_global(void *p) 
+{
+    /* XXX Using the monitor-table as a map will happen here  */
+    unmap_domain_page_global(p);
+}
+
+static inline int
+sh_mfn_is_dirty(struct domain *d, mfn_t gmfn)
+/* Is this guest page dirty?  Call only in log-dirty mode. */
+{
+    unsigned long pfn;
+    ASSERT(shadow_mode_log_dirty(d));
+    ASSERT(d->arch.shadow.dirty_bitmap != NULL);
+
+    /* We /really/ mean PFN here, even for non-translated guests. */
+    pfn = get_gpfn_from_mfn(mfn_x(gmfn));
+    if ( likely(VALID_M2P(pfn))
+         && likely(pfn < d->arch.shadow.dirty_bitmap_size) 
+         && test_bit(pfn, d->arch.shadow.dirty_bitmap) )
+        return 1;
+
+    return 0;
+}
+
+static inline int
+sh_mfn_is_a_page_table(mfn_t gmfn)
+{
+    struct page_info *page = mfn_to_page(gmfn);
+    struct domain *owner;
+    unsigned long type_info;
+
+    if ( !valid_mfn(gmfn) )
+        return 0;
+
+    owner = page_get_owner(page);
+    if ( owner && shadow_mode_refcounts(owner) 
+         && (page->count_info & PGC_page_table) )
+        return 1; 
+
+    type_info = page->u.inuse.type_info & PGT_type_mask;
+    return type_info && (type_info <= PGT_l4_page_table);
+}
+
+
+/**************************************************************************/
+/* Shadow-page refcounting. See comment in shadow-common.c about the  
+ * use of struct page_info fields for shadow pages */
+
+void sh_destroy_shadow(struct vcpu *v, mfn_t smfn);
+
+/* Increase the refcount of a shadow page.  Arguments are the mfn to refcount, 
+ * and the physical address of the shadow entry that holds the ref (or zero
+ * if the ref is held by something else) */
+static inline void sh_get_ref(mfn_t smfn, paddr_t entry_pa)
+{
+    u32 x, nx;
+    struct page_info *page = mfn_to_page(smfn);
+
+    ASSERT(mfn_valid(smfn));
+
+    x = page->count_info & PGC_SH_count_mask;
+    nx = x + 1;
+
+    if ( unlikely(nx & ~PGC_SH_count_mask) )
+    {
+        SHADOW_PRINTK("shadow ref overflow, gmfn=%" PRtype_info " smfn=%lx\n",
+                       page->u.inuse.type_info, mfn_x(smfn));
+        domain_crash_synchronous();
+    }
+    
+    /* Guarded by the shadow lock, so no need for atomic update */
+    page->count_info &= ~PGC_SH_count_mask;
+    page->count_info |= nx;
+
+    /* We remember the first shadow entry that points to each shadow. */
+    if ( entry_pa != 0 && page->up == 0 ) 
+        page->up = entry_pa;
+}
+
+
+/* Decrease the refcount of a shadow page.  As for get_ref, takes the
+ * physical address of the shadow entry that held this reference. */
+static inline void sh_put_ref(struct vcpu *v, mfn_t smfn, paddr_t entry_pa)
+{
+    u32 x, nx;
+    struct page_info *page = mfn_to_page(smfn);
+
+    ASSERT(mfn_valid(smfn));
+    ASSERT(page_get_owner(page) == NULL);
+
+    /* If this is the entry in the up-pointer, remove it */
+    if ( entry_pa != 0 && page->up == entry_pa ) 
+        page->up = 0;
+
+    x = page->count_info & PGC_SH_count_mask;
+    nx = x - 1;
+
+    if ( unlikely(x == 0) ) 
+    {
+        SHADOW_PRINTK("shadow ref underflow, smfn=%lx oc=%08x t=%" 
+                       PRtype_info "\n",
+                       mfn_x(smfn),
+                       page->count_info & PGC_SH_count_mask,
+                       page->u.inuse.type_info);
+        domain_crash_synchronous();
+    }
+
+    /* Guarded by the shadow lock, so no need for atomic update */
+    page->count_info &= ~PGC_SH_count_mask;
+    page->count_info |= nx;
+
+    if ( unlikely(nx == 0) ) 
+        sh_destroy_shadow(v, smfn);
+}
+
+
+/* Pin a shadow page: take an extra refcount and set the pin bit. */
+static inline void sh_pin(mfn_t smfn)
+{
+    struct page_info *page;
+    
+    ASSERT(mfn_valid(smfn));
+    page = mfn_to_page(smfn);
+    if ( !(page->count_info & PGC_SH_pinned) ) 
+    {
+        sh_get_ref(smfn, 0);
+        page->count_info |= PGC_SH_pinned;
+    }
+}
+
+/* Unpin a shadow page: unset the pin bit and release the extra ref. */
+static inline void sh_unpin(struct vcpu *v, mfn_t smfn)
+{
+    struct page_info *page;
+    
+    ASSERT(mfn_valid(smfn));
+    page = mfn_to_page(smfn);
+    if ( page->count_info & PGC_SH_pinned )
+    {
+        page->count_info &= ~PGC_SH_pinned;
+        sh_put_ref(v, smfn, 0);
+    }
+}
+
+/**************************************************************************/
+/* Guest physmap (p2m) support */
+
+/* Read our own P2M table, checking in the linear pagetables first to be
+ * sure that we will succeed.  Call this function if you expect it to
+ * fail often, as it avoids page faults.  If you expect to succeed, use
+ * vcpu_gfn_to_mfn, which copy_from_user()s the entry */
+static inline mfn_t
+vcpu_gfn_to_mfn_nofault(struct vcpu *v, unsigned long gfn)
+{
+    unsigned long entry_addr = (unsigned long) &phys_to_machine_mapping[gfn];
+#if CONFIG_PAGING_LEVELS >= 4
+    l4_pgentry_t *l4e;
+    l3_pgentry_t *l3e;
+#endif
+    l2_pgentry_t *l2e;
+    l1_pgentry_t *l1e;
+
+    ASSERT(current == v);
+    if ( !shadow_vcpu_mode_translate(v) )
+        return _mfn(gfn);
+
+#if CONFIG_PAGING_LEVELS > 2
+    if ( gfn > (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t) ) 
+        /* This pfn is higher than the p2m map can hold */
+        return _mfn(INVALID_MFN);
+#endif
+    
+    /* Walk the linear pagetables.  Note that this is *not* the same as 
+     * the walk in sh_gfn_to_mfn_foreign, which is walking the p2m map */
+#if CONFIG_PAGING_LEVELS >= 4
+    l4e = __linear_l4_table + l4_linear_offset(entry_addr);
+    if ( !(l4e_get_flags(*l4e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN);
+    l3e = __linear_l3_table + l3_linear_offset(entry_addr);
+    if ( !(l3e_get_flags(*l3e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN);
+#endif
+    l2e = __linear_l2_table + l2_linear_offset(entry_addr);
+    if ( !(l2e_get_flags(*l2e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN);
+    l1e = __linear_l1_table + l1_linear_offset(entry_addr);
+    if ( !(l1e_get_flags(*l1e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN);
+
+    /* Safe to look at this part of the table */
+    if ( l1e_get_flags(phys_to_machine_mapping[gfn])  & _PAGE_PRESENT )
+        return _mfn(l1e_get_pfn(phys_to_machine_mapping[gfn]));
+    
+    return _mfn(INVALID_MFN);
+}
+
+
+#endif /* _XEN_SHADOW_PRIVATE_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h

new file mode 100644 (file)

index 0000000..bf1b2ce
--- /dev/null
+++ b/xen/arch/x86/mm/shadow/types.h
@@ -0,0 +1,692 @@
+/******************************************************************************
+ * arch/x86/mm/shadow/types.h
+ * 
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _XEN_SHADOW_TYPES_H
+#define _XEN_SHADOW_TYPES_H
+
+// Map a shadow page
+static inline void *
+map_shadow_page(mfn_t smfn)
+{
+    // XXX -- Possible optimization/measurement question for 32-bit and PAE
+    //        hypervisors:
+    //        How often is this smfn already available in the shadow linear
+    //        table?  Might it be worth checking that table first,
+    //        presumably using the reverse map hint in the page_info of this
+    //        smfn, rather than calling map_domain_page()?
+    //
+    return sh_map_domain_page(smfn);
+}
+
+// matching unmap for map_shadow_page()
+static inline void
+unmap_shadow_page(void *p)
+{
+    sh_unmap_domain_page(p);
+}
+
+/* 
+ * Define various types for handling pagetabels, based on these options:
+ * SHADOW_PAGING_LEVELS : Number of levels of shadow pagetables
+ * GUEST_PAGING_LEVELS  : Number of levels of guest pagetables
+ */
+
+#if (CONFIG_PAGING_LEVELS < SHADOW_PAGING_LEVELS) 
+#error Cannot have more levels of shadow pagetables than host pagetables
+#endif
+
+#if (SHADOW_PAGING_LEVELS < GUEST_PAGING_LEVELS) 
+#error Cannot have more levels of guest pagetables than shadow pagetables
+#endif
+
+#if SHADOW_PAGING_LEVELS == 2
+#define SHADOW_L1_PAGETABLE_ENTRIES    1024
+#define SHADOW_L2_PAGETABLE_ENTRIES    1024
+#define SHADOW_L1_PAGETABLE_SHIFT        12
+#define SHADOW_L2_PAGETABLE_SHIFT        22
+#endif
+
+#if SHADOW_PAGING_LEVELS == 3
+#define SHADOW_L1_PAGETABLE_ENTRIES     512
+#define SHADOW_L2_PAGETABLE_ENTRIES     512
+#define SHADOW_L3_PAGETABLE_ENTRIES       4
+#define SHADOW_L1_PAGETABLE_SHIFT        12
+#define SHADOW_L2_PAGETABLE_SHIFT        21
+#define SHADOW_L3_PAGETABLE_SHIFT        30
+#endif
+
+#if SHADOW_PAGING_LEVELS == 4
+#define SHADOW_L1_PAGETABLE_ENTRIES     512
+#define SHADOW_L2_PAGETABLE_ENTRIES     512
+#define SHADOW_L3_PAGETABLE_ENTRIES     512
+#define SHADOW_L4_PAGETABLE_ENTRIES     512
+#define SHADOW_L1_PAGETABLE_SHIFT        12
+#define SHADOW_L2_PAGETABLE_SHIFT        21
+#define SHADOW_L3_PAGETABLE_SHIFT        30
+#define SHADOW_L4_PAGETABLE_SHIFT        39
+#endif
+
+/* Types of the shadow page tables */
+typedef l1_pgentry_t shadow_l1e_t;
+typedef l2_pgentry_t shadow_l2e_t;
+#if SHADOW_PAGING_LEVELS >= 3
+typedef l3_pgentry_t shadow_l3e_t;
+#if SHADOW_PAGING_LEVELS >= 4
+typedef l4_pgentry_t shadow_l4e_t;
+#endif
+#endif
+
+/* Access functions for them */
+static inline paddr_t shadow_l1e_get_paddr(shadow_l1e_t sl1e)
+{ return l1e_get_paddr(sl1e); }
+static inline paddr_t shadow_l2e_get_paddr(shadow_l2e_t sl2e)
+{ return l2e_get_paddr(sl2e); }
+#if SHADOW_PAGING_LEVELS >= 3
+static inline paddr_t shadow_l3e_get_paddr(shadow_l3e_t sl3e)
+{ return l3e_get_paddr(sl3e); }
+#if SHADOW_PAGING_LEVELS >= 4
+static inline paddr_t shadow_l4e_get_paddr(shadow_l4e_t sl4e)
+{ return l4e_get_paddr(sl4e); }
+#endif
+#endif
+
+static inline mfn_t shadow_l1e_get_mfn(shadow_l1e_t sl1e)
+{ return _mfn(l1e_get_pfn(sl1e)); }
+static inline mfn_t shadow_l2e_get_mfn(shadow_l2e_t sl2e)
+{ return _mfn(l2e_get_pfn(sl2e)); }
+#if SHADOW_PAGING_LEVELS >= 3
+static inline mfn_t shadow_l3e_get_mfn(shadow_l3e_t sl3e)
+{ return _mfn(l3e_get_pfn(sl3e)); }
+#if SHADOW_PAGING_LEVELS >= 4
+static inline mfn_t shadow_l4e_get_mfn(shadow_l4e_t sl4e)
+{ return _mfn(l4e_get_pfn(sl4e)); }
+#endif
+#endif
+
+static inline u32 shadow_l1e_get_flags(shadow_l1e_t sl1e)
+{ return l1e_get_flags(sl1e); }
+static inline u32 shadow_l2e_get_flags(shadow_l2e_t sl2e)
+{ return l2e_get_flags(sl2e); }
+#if SHADOW_PAGING_LEVELS >= 3
+static inline u32 shadow_l3e_get_flags(shadow_l3e_t sl3e)
+{ return l3e_get_flags(sl3e); }
+#if SHADOW_PAGING_LEVELS >= 4
+static inline u32 shadow_l4e_get_flags(shadow_l4e_t sl4e)
+{ return l4e_get_flags(sl4e); }
+#endif
+#endif
+
+static inline shadow_l1e_t
+shadow_l1e_remove_flags(shadow_l1e_t sl1e, u32 flags)
+{ l1e_remove_flags(sl1e, flags); return sl1e; }
+
+static inline shadow_l1e_t shadow_l1e_empty(void) 
+{ return l1e_empty(); }
+static inline shadow_l2e_t shadow_l2e_empty(void) 
+{ return l2e_empty(); }
+#if SHADOW_PAGING_LEVELS >= 3
+static inline shadow_l3e_t shadow_l3e_empty(void) 
+{ return l3e_empty(); }
+#if SHADOW_PAGING_LEVELS >= 4
+static inline shadow_l4e_t shadow_l4e_empty(void) 
+{ return l4e_empty(); }
+#endif
+#endif
+
+static inline shadow_l1e_t shadow_l1e_from_mfn(mfn_t mfn, u32 flags)
+{ return l1e_from_pfn(mfn_x(mfn), flags); }
+static inline shadow_l2e_t shadow_l2e_from_mfn(mfn_t mfn, u32 flags)
+{ return l2e_from_pfn(mfn_x(mfn), flags); }
+#if SHADOW_PAGING_LEVELS >= 3
+static inline shadow_l3e_t shadow_l3e_from_mfn(mfn_t mfn, u32 flags)
+{ return l3e_from_pfn(mfn_x(mfn), flags); }
+#if SHADOW_PAGING_LEVELS >= 4
+static inline shadow_l4e_t shadow_l4e_from_mfn(mfn_t mfn, u32 flags)
+{ return l4e_from_pfn(mfn_x(mfn), flags); }
+#endif
+#endif
+
+#define shadow_l1_table_offset(a) l1_table_offset(a)
+#define shadow_l2_table_offset(a) l2_table_offset(a)
+#define shadow_l3_table_offset(a) l3_table_offset(a)
+#define shadow_l4_table_offset(a) l4_table_offset(a)
+
+/**************************************************************************/
+/* Access to the linear mapping of shadow page tables. */
+
+/* Offsets into each level of the linear mapping for a virtual address. */
+#define shadow_l1_linear_offset(_a)                                           \
+        (((_a) & VADDR_MASK) >> SHADOW_L1_PAGETABLE_SHIFT)
+#define shadow_l2_linear_offset(_a)                                           \
+        (((_a) & VADDR_MASK) >> SHADOW_L2_PAGETABLE_SHIFT)
+#define shadow_l3_linear_offset(_a)                                           \
+        (((_a) & VADDR_MASK) >> SHADOW_L3_PAGETABLE_SHIFT)
+#define shadow_l4_linear_offset(_a)                                           \
+        (((_a) & VADDR_MASK) >> SHADOW_L4_PAGETABLE_SHIFT)
+
+/* Where to find each level of the linear mapping.  For PV guests, we use 
+ * the shadow linear-map self-entry as many times as we need.  For HVM 
+ * guests, the shadow doesn't have a linear-map self-entry so we must use 
+ * the monitor-table's linear-map entry N-1 times and then the shadow-map 
+ * entry once. */
+#define __sh_linear_l1_table ((shadow_l1e_t *)(SH_LINEAR_PT_VIRT_START))
+#define __sh_linear_l2_table ((shadow_l2e_t *)                               \
+    (__sh_linear_l1_table + shadow_l1_linear_offset(SH_LINEAR_PT_VIRT_START)))
+
+// shadow linear L3 and L4 tables only exist in 4 level paging...
+#if SHADOW_PAGING_LEVELS == 4
+#define __sh_linear_l3_table ((shadow_l3e_t *)                               \
+    (__sh_linear_l2_table + shadow_l2_linear_offset(SH_LINEAR_PT_VIRT_START)))
+#define __sh_linear_l4_table ((shadow_l4e_t *)                               \
+    (__sh_linear_l3_table + shadow_l3_linear_offset(SH_LINEAR_PT_VIRT_START)))
+#endif
+
+#define sh_linear_l1_table(v) ({ \
+    ASSERT(current == (v)); \
+    __sh_linear_l1_table; \
+})
+
+#define sh_linear_l2_table(v) ({ \
+    ASSERT(current == (v)); \
+    ((shadow_l2e_t *) \
+     (hvm_guest(v) ? __linear_l1_table : __sh_linear_l1_table) + \
+     shadow_l1_linear_offset(SH_LINEAR_PT_VIRT_START)); \
+})
+
+// shadow linear L3 and L4 tables only exist in 4 level paging...
+#if SHADOW_PAGING_LEVELS == 4
+#define sh_linear_l3_table(v) ({ \
+    ASSERT(current == (v)); \
+    ((shadow_l3e_t *) \
+     (hvm_guest(v) ? __linear_l2_table : __sh_linear_l2_table) + \
+      shadow_l2_linear_offset(SH_LINEAR_PT_VIRT_START)); \
+})
+
+// we use l4_pgentry_t instead of shadow_l4e_t below because shadow_l4e_t is
+// not defined for when xen_levels==4 & shadow_levels==3...
+#define sh_linear_l4_table(v) ({ \
+    ASSERT(current == (v)); \
+    ((l4_pgentry_t *) \
+     (hvm_guest(v) ? __linear_l3_table : __sh_linear_l3_table) + \
+      shadow_l3_linear_offset(SH_LINEAR_PT_VIRT_START)); \
+})
+#endif
+
+#if GUEST_PAGING_LEVELS == 2
+
+#include "page-guest32.h"
+
+#define GUEST_L1_PAGETABLE_ENTRIES     1024
+#define GUEST_L2_PAGETABLE_ENTRIES     1024
+#define GUEST_L1_PAGETABLE_SHIFT         12
+#define GUEST_L2_PAGETABLE_SHIFT         22
+
+/* Type of the guest's frame numbers */
+TYPE_SAFE(u32,gfn)
+#define INVALID_GFN ((u32)(-1u))
+#define SH_PRI_gfn "05x"
+
+/* Types of the guest's page tables */
+typedef l1_pgentry_32_t guest_l1e_t;
+typedef l2_pgentry_32_t guest_l2e_t;
+
+/* Access functions for them */
+static inline paddr_t guest_l1e_get_paddr(guest_l1e_t gl1e)
+{ return l1e_get_paddr_32(gl1e); }
+static inline paddr_t guest_l2e_get_paddr(guest_l2e_t gl2e)
+{ return l2e_get_paddr_32(gl2e); }
+
+static inline gfn_t guest_l1e_get_gfn(guest_l1e_t gl1e)
+{ return _gfn(l1e_get_paddr_32(gl1e) >> PAGE_SHIFT); }
+static inline gfn_t guest_l2e_get_gfn(guest_l2e_t gl2e)
+{ return _gfn(l2e_get_paddr_32(gl2e) >> PAGE_SHIFT); }
+
+static inline u32 guest_l1e_get_flags(guest_l1e_t gl1e)
+{ return l1e_get_flags_32(gl1e); }
+static inline u32 guest_l2e_get_flags(guest_l2e_t gl2e)
+{ return l2e_get_flags_32(gl2e); }
+
+static inline guest_l1e_t guest_l1e_add_flags(guest_l1e_t gl1e, u32 flags)
+{ l1e_add_flags_32(gl1e, flags); return gl1e; }
+static inline guest_l2e_t guest_l2e_add_flags(guest_l2e_t gl2e, u32 flags)
+{ l2e_add_flags_32(gl2e, flags); return gl2e; }
+
+static inline guest_l1e_t guest_l1e_from_gfn(gfn_t gfn, u32 flags)
+{ return l1e_from_pfn_32(gfn_x(gfn), flags); }
+static inline guest_l2e_t guest_l2e_from_gfn(gfn_t gfn, u32 flags)
+{ return l2e_from_pfn_32(gfn_x(gfn), flags); }
+
+#define guest_l1_table_offset(a) l1_table_offset_32(a)
+#define guest_l2_table_offset(a) l2_table_offset_32(a)
+
+/* The shadow types needed for the various levels. */
+#define PGC_SH_l1_shadow  PGC_SH_l1_32_shadow
+#define PGC_SH_l2_shadow  PGC_SH_l2_32_shadow
+#define PGC_SH_fl1_shadow PGC_SH_fl1_32_shadow
+
+#else /* GUEST_PAGING_LEVELS != 2 */
+
+#if GUEST_PAGING_LEVELS == 3
+#define GUEST_L1_PAGETABLE_ENTRIES      512
+#define GUEST_L2_PAGETABLE_ENTRIES      512
+#define GUEST_L3_PAGETABLE_ENTRIES        4
+#define GUEST_L1_PAGETABLE_SHIFT         12
+#define GUEST_L2_PAGETABLE_SHIFT         21
+#define GUEST_L3_PAGETABLE_SHIFT         30
+#else /* GUEST_PAGING_LEVELS == 4 */
+#define GUEST_L1_PAGETABLE_ENTRIES      512
+#define GUEST_L2_PAGETABLE_ENTRIES      512
+#define GUEST_L3_PAGETABLE_ENTRIES      512
+#define GUEST_L4_PAGETABLE_ENTRIES      512
+#define GUEST_L1_PAGETABLE_SHIFT         12
+#define GUEST_L2_PAGETABLE_SHIFT         21
+#define GUEST_L3_PAGETABLE_SHIFT         30
+#define GUEST_L4_PAGETABLE_SHIFT         39
+#endif
+
+/* Type of the guest's frame numbers */
+TYPE_SAFE(unsigned long,gfn)
+#define INVALID_GFN ((unsigned long)(-1ul))
+#define SH_PRI_gfn "05lx"
+
+/* Types of the guest's page tables */
+typedef l1_pgentry_t guest_l1e_t;
+typedef l2_pgentry_t guest_l2e_t;
+typedef l3_pgentry_t guest_l3e_t;
+#if GUEST_PAGING_LEVELS >= 4
+typedef l4_pgentry_t guest_l4e_t;
+#endif
+
+/* Access functions for them */
+static inline paddr_t guest_l1e_get_paddr(guest_l1e_t gl1e)
+{ return l1e_get_paddr(gl1e); }
+static inline paddr_t guest_l2e_get_paddr(guest_l2e_t gl2e)
+{ return l2e_get_paddr(gl2e); }
+static inline paddr_t guest_l3e_get_paddr(guest_l3e_t gl3e)
+{ return l3e_get_paddr(gl3e); }
+#if GUEST_PAGING_LEVELS >= 4
+static inline paddr_t guest_l4e_get_paddr(guest_l4e_t gl4e)
+{ return l4e_get_paddr(gl4e); }
+#endif
+
+static inline gfn_t guest_l1e_get_gfn(guest_l1e_t gl1e)
+{ return _gfn(l1e_get_paddr(gl1e) >> PAGE_SHIFT); }
+static inline gfn_t guest_l2e_get_gfn(guest_l2e_t gl2e)
+{ return _gfn(l2e_get_paddr(gl2e) >> PAGE_SHIFT); }
+static inline gfn_t guest_l3e_get_gfn(guest_l3e_t gl3e)
+{ return _gfn(l3e_get_paddr(gl3e) >> PAGE_SHIFT); }
+#if GUEST_PAGING_LEVELS >= 4
+static inline gfn_t guest_l4e_get_gfn(guest_l4e_t gl4e)
+{ return _gfn(l4e_get_paddr(gl4e) >> PAGE_SHIFT); }
+#endif
+
+static inline u32 guest_l1e_get_flags(guest_l1e_t gl1e)
+{ return l1e_get_flags(gl1e); }
+static inline u32 guest_l2e_get_flags(guest_l2e_t gl2e)
+{ return l2e_get_flags(gl2e); }
+static inline u32 guest_l3e_get_flags(guest_l3e_t gl3e)
+{ return l3e_get_flags(gl3e); }
+#if GUEST_PAGING_LEVELS >= 4
+static inline u32 guest_l4e_get_flags(guest_l4e_t gl4e)
+{ return l4e_get_flags(gl4e); }
+#endif
+
+static inline guest_l1e_t guest_l1e_add_flags(guest_l1e_t gl1e, u32 flags)
+{ l1e_add_flags(gl1e, flags); return gl1e; }
+static inline guest_l2e_t guest_l2e_add_flags(guest_l2e_t gl2e, u32 flags)
+{ l2e_add_flags(gl2e, flags); return gl2e; }
+static inline guest_l3e_t guest_l3e_add_flags(guest_l3e_t gl3e, u32 flags)
+{ l3e_add_flags(gl3e, flags); return gl3e; }
+#if GUEST_PAGING_LEVELS >= 4
+static inline guest_l4e_t guest_l4e_add_flags(guest_l4e_t gl4e, u32 flags)
+{ l4e_add_flags(gl4e, flags); return gl4e; }
+#endif
+
+static inline guest_l1e_t guest_l1e_from_gfn(gfn_t gfn, u32 flags)
+{ return l1e_from_pfn(gfn_x(gfn), flags); }
+static inline guest_l2e_t guest_l2e_from_gfn(gfn_t gfn, u32 flags)
+{ return l2e_from_pfn(gfn_x(gfn), flags); }
+static inline guest_l3e_t guest_l3e_from_gfn(gfn_t gfn, u32 flags)
+{ return l3e_from_pfn(gfn_x(gfn), flags); }
+#if GUEST_PAGING_LEVELS >= 4
+static inline guest_l4e_t guest_l4e_from_gfn(gfn_t gfn, u32 flags)
+{ return l4e_from_pfn(gfn_x(gfn), flags); }
+#endif
+
+#define guest_l1_table_offset(a) l1_table_offset(a)
+#define guest_l2_table_offset(a) l2_table_offset(a)
+#define guest_l3_table_offset(a) l3_table_offset(a)
+#define guest_l4_table_offset(a) l4_table_offset(a)
+
+/* The shadow types needed for the various levels. */
+#if GUEST_PAGING_LEVELS == 3
+#define PGC_SH_l1_shadow  PGC_SH_l1_pae_shadow
+#define PGC_SH_fl1_shadow PGC_SH_fl1_pae_shadow
+#define PGC_SH_l2_shadow  PGC_SH_l2_pae_shadow
+#define PGC_SH_l2h_shadow PGC_SH_l2h_pae_shadow
+#define PGC_SH_l3_shadow  PGC_SH_l3_pae_shadow
+#else
+#define PGC_SH_l1_shadow  PGC_SH_l1_64_shadow
+#define PGC_SH_fl1_shadow PGC_SH_fl1_64_shadow
+#define PGC_SH_l2_shadow  PGC_SH_l2_64_shadow
+#define PGC_SH_l3_shadow  PGC_SH_l3_64_shadow
+#define PGC_SH_l4_shadow  PGC_SH_l4_64_shadow
+#endif
+
+#endif /* GUEST_PAGING_LEVELS != 2 */
+
+#define VALID_GFN(m) (m != INVALID_GFN)
+
+static inline int
+valid_gfn(gfn_t m)
+{
+    return VALID_GFN(gfn_x(m));
+}
+
+#if GUEST_PAGING_LEVELS == 2
+#define PGC_SH_guest_root_type PGC_SH_l2_32_shadow
+#elif GUEST_PAGING_LEVELS == 3
+#define PGC_SH_guest_root_type PGC_SH_l3_pae_shadow
+#else
+#define PGC_SH_guest_root_type PGC_SH_l4_64_shadow
+#endif
+
+/* Translation between mfns and gfns */
+static inline mfn_t
+vcpu_gfn_to_mfn(struct vcpu *v, gfn_t gfn)
+{
+    return sh_vcpu_gfn_to_mfn(v, gfn_x(gfn));
+} 
+
+static inline gfn_t
+mfn_to_gfn(struct domain *d, mfn_t mfn)
+{
+    return _gfn(sh_mfn_to_gfn(d, mfn));
+}
+
+static inline paddr_t
+gfn_to_paddr(gfn_t gfn)
+{
+    return ((paddr_t)gfn_x(gfn)) << PAGE_SHIFT;
+}
+
+/* Type used for recording a walk through guest pagetables.  It is
+ * filled in by the pagetable walk function, and also used as a cache
+ * for later walks.  
+ * Any non-null pointer in this structure represents a mapping of guest
+ * memory.  We must always call walk_init() before using a walk_t, and 
+ * call walk_unmap() when we're done. 
+ * The "Effective l1e" field is used when there isn't an l1e to point to, 
+ * but we have fabricated an l1e for propagation to the shadow (e.g., 
+ * for splintering guest superpages into many shadow l1 entries).  */
+typedef struct shadow_walk_t walk_t;
+struct shadow_walk_t 
+{
+    unsigned long va;           /* Address we were looking for */
+#if GUEST_PAGING_LEVELS >= 3
+#if GUEST_PAGING_LEVELS >= 4
+    guest_l4e_t *l4e;           /* Pointer to guest's level 4 entry */
+#endif
+    guest_l3e_t *l3e;           /* Pointer to guest's level 3 entry */
+#endif
+    guest_l2e_t *l2e;           /* Pointer to guest's level 2 entry */
+    guest_l1e_t *l1e;           /* Pointer to guest's level 1 entry */
+    guest_l1e_t eff_l1e;        /* Effective level 1 entry */
+#if GUEST_PAGING_LEVELS >= 3
+#if GUEST_PAGING_LEVELS >= 4
+    mfn_t l4mfn;                /* MFN that the level 4 entry is in */
+#endif
+    mfn_t l3mfn;                /* MFN that the level 3 entry is in */
+#endif
+    mfn_t l2mfn;                /* MFN that the level 2 entry is in */
+    mfn_t l1mfn;                /* MFN that the level 1 entry is in */
+};
+
+/* macros for dealing with the naming of the internal function names of the
+ * shadow code's external entry points.
+ */
+#define INTERNAL_NAME(name) \
+    SHADOW_INTERNAL_NAME(name, SHADOW_PAGING_LEVELS, GUEST_PAGING_LEVELS)
+
+/* macros for renaming the primary entry points, so that they are more
+ * easily distinguished from a debugger
+ */
+#define sh_page_fault              INTERNAL_NAME(sh_page_fault)
+#define sh_invlpg                  INTERNAL_NAME(sh_invlpg)
+#define sh_gva_to_gpa              INTERNAL_NAME(sh_gva_to_gpa)
+#define sh_gva_to_gfn              INTERNAL_NAME(sh_gva_to_gfn)
+#define sh_update_cr3              INTERNAL_NAME(sh_update_cr3)
+#define sh_remove_write_access     INTERNAL_NAME(sh_remove_write_access)
+#define sh_remove_all_mappings     INTERNAL_NAME(sh_remove_all_mappings)
+#define sh_remove_l1_shadow        INTERNAL_NAME(sh_remove_l1_shadow)
+#define sh_remove_l2_shadow        INTERNAL_NAME(sh_remove_l2_shadow)
+#define sh_remove_l3_shadow        INTERNAL_NAME(sh_remove_l3_shadow)
+#define sh_map_and_validate_gl4e   INTERNAL_NAME(sh_map_and_validate_gl4e)
+#define sh_map_and_validate_gl3e   INTERNAL_NAME(sh_map_and_validate_gl3e)
+#define sh_map_and_validate_gl2e   INTERNAL_NAME(sh_map_and_validate_gl2e)
+#define sh_map_and_validate_gl2he  INTERNAL_NAME(sh_map_and_validate_gl2he)
+#define sh_map_and_validate_gl1e   INTERNAL_NAME(sh_map_and_validate_gl1e)
+#define sh_destroy_l4_shadow       INTERNAL_NAME(sh_destroy_l4_shadow)
+#define sh_destroy_l3_shadow       INTERNAL_NAME(sh_destroy_l3_shadow)
+#define sh_destroy_l3_subshadow    INTERNAL_NAME(sh_destroy_l3_subshadow)
+#define sh_unpin_all_l3_subshadows INTERNAL_NAME(sh_unpin_all_l3_subshadows)
+#define sh_destroy_l2_shadow       INTERNAL_NAME(sh_destroy_l2_shadow)
+#define sh_destroy_l1_shadow       INTERNAL_NAME(sh_destroy_l1_shadow)
+#define sh_unhook_32b_mappings     INTERNAL_NAME(sh_unhook_32b_mappings)
+#define sh_unhook_pae_mappings     INTERNAL_NAME(sh_unhook_pae_mappings)
+#define sh_unhook_64b_mappings     INTERNAL_NAME(sh_unhook_64b_mappings)
+#define sh_paging_mode             INTERNAL_NAME(sh_paging_mode)
+#define sh_detach_old_tables       INTERNAL_NAME(sh_detach_old_tables)
+#define sh_x86_emulate_write       INTERNAL_NAME(sh_x86_emulate_write)
+#define sh_x86_emulate_cmpxchg     INTERNAL_NAME(sh_x86_emulate_cmpxchg)
+#define sh_x86_emulate_cmpxchg8b   INTERNAL_NAME(sh_x86_emulate_cmpxchg8b)
+#define sh_audit_l1_table          INTERNAL_NAME(sh_audit_l1_table)
+#define sh_audit_fl1_table         INTERNAL_NAME(sh_audit_fl1_table)
+#define sh_audit_l2_table          INTERNAL_NAME(sh_audit_l2_table)
+#define sh_audit_l3_table          INTERNAL_NAME(sh_audit_l3_table)
+#define sh_audit_l4_table          INTERNAL_NAME(sh_audit_l4_table)
+#define sh_guess_wrmap             INTERNAL_NAME(sh_guess_wrmap)
+#define sh_clear_shadow_entry      INTERNAL_NAME(sh_clear_shadow_entry)
+
+/* sh_make_monitor_table only depends on the number of shadow levels */
+#define sh_make_monitor_table                          \
+        SHADOW_INTERNAL_NAME(sh_make_monitor_table,   \
+                              SHADOW_PAGING_LEVELS,     \
+                              SHADOW_PAGING_LEVELS)
+#define sh_destroy_monitor_table                               \
+        SHADOW_INTERNAL_NAME(sh_destroy_monitor_table,        \
+                              SHADOW_PAGING_LEVELS,             \
+                              SHADOW_PAGING_LEVELS)
+
+
+#if GUEST_PAGING_LEVELS == 3
+/*
+ * Accounting information stored in the shadow of PAE Guest L3 pages.
+ * Because these "L3 pages" are only 32-bytes, it is inconvenient to keep
+ * various refcounts, etc., on the page_info of their page.  We provide extra
+ * bookkeeping space in the shadow itself, and this is the structure
+ * definition for that bookkeeping information.
+ */
+struct pae_l3_bookkeeping {
+    u32 vcpus;                  /* bitmap of which vcpus are currently storing
+                                 * copies of this 32-byte page */
+    u32 refcount;               /* refcount for this 32-byte page */
+    u8 pinned;                  /* is this 32-byte page pinned or not? */
+};
+
+// Convert a shadow entry pointer into a pae_l3_bookkeeping pointer.
+#define sl3p_to_info(_ptr) ((struct pae_l3_bookkeeping *)         \
+                            (((unsigned long)(_ptr) & ~31) + 32))
+
+static void sh_destroy_l3_subshadow(struct vcpu *v, 
+                                     shadow_l3e_t *sl3e);
+
+/* Increment a subshadow ref
+ * Called with a pointer to the subshadow, and the mfn of the
+ * *first* page of the overall shadow. */
+static inline void sh_get_ref_l3_subshadow(shadow_l3e_t *sl3e, mfn_t smfn)
+{
+    struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e);
+
+    /* First ref to the subshadow takes a ref to the full shadow */
+    if ( bk->refcount == 0 ) 
+        sh_get_ref(smfn, 0);
+    if ( unlikely(++(bk->refcount) == 0) )
+    {
+        SHADOW_PRINTK("shadow l3 subshadow ref overflow, smfn=%" SH_PRI_mfn " sh=%p\n", 
+                       mfn_x(smfn), sl3e);
+        domain_crash_synchronous();
+    }
+}
+
+/* Decrement a subshadow ref.
+ * Called with a pointer to the subshadow, and the mfn of the
+ * *first* page of the overall shadow.  Calling this may cause the 
+ * entire shadow to disappear, so the caller must immediately unmap 
+ * the pointer after calling. */ 
+static inline void sh_put_ref_l3_subshadow(struct vcpu *v, 
+                                            shadow_l3e_t *sl3e,
+                                            mfn_t smfn)
+{
+    struct pae_l3_bookkeeping *bk;
+
+    bk = sl3p_to_info(sl3e);
+
+    ASSERT(bk->refcount > 0);
+    if ( --(bk->refcount) == 0 )
+    {
+        /* Need to destroy this subshadow */
+        sh_destroy_l3_subshadow(v, sl3e);
+        /* Last ref to the subshadow had a ref to the full shadow */
+        sh_put_ref(v, smfn, 0);
+    }
+}
+
+/* Pin a subshadow 
+ * Called with a pointer to the subshadow, and the mfn of the
+ * *first* page of the overall shadow. */
+static inline void sh_pin_l3_subshadow(shadow_l3e_t *sl3e, mfn_t smfn)
+{
+    struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e);
+
+#if 0
+    debugtrace_printk("%s smfn=%05lx offset=%ld\n",
+                      __func__, mfn_x(smfn),
+                      ((unsigned long)sl3e & ~PAGE_MASK) / 64);
+#endif
+
+    if ( !bk->pinned )
+    {
+        bk->pinned = 1;
+        sh_get_ref_l3_subshadow(sl3e, smfn);
+    }
+}
+
+/* Unpin a sub-shadow. 
+ * Called with a pointer to the subshadow, and the mfn of the
+ * *first* page of the overall shadow.  Calling this may cause the 
+ * entire shadow to disappear, so the caller must immediately unmap 
+ * the pointer after calling. */ 
+static inline void sh_unpin_l3_subshadow(struct vcpu *v, 
+                                          shadow_l3e_t *sl3e,
+                                          mfn_t smfn)
+{
+    struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e);
+
+#if 0
+    debugtrace_printk("%s smfn=%05lx offset=%ld\n",
+                      __func__, mfn_x(smfn),
+                      ((unsigned long)sl3e & ~PAGE_MASK) / 64);
+#endif
+
+    if ( bk->pinned )
+    {
+        bk->pinned = 0;
+        sh_put_ref_l3_subshadow(v, sl3e, smfn);
+    }
+}
+
+#endif /* GUEST_PAGING_LEVELS == 3 */
+
+#if SHADOW_PAGING_LEVELS == 3
+#define MFN_FITS_IN_HVM_CR3(_MFN) !(mfn_x(_MFN) >> 20)
+#endif
+
+#if SHADOW_PAGING_LEVELS == 2
+#define SH_PRI_pte "08x"
+#else /* SHADOW_PAGING_LEVELS >= 3 */
+#ifndef __x86_64__
+#define SH_PRI_pte "016llx"
+#else
+#define SH_PRI_pte "016lx"
+#endif
+#endif /* SHADOW_PAGING_LEVELS >= 3 */
+
+#if GUEST_PAGING_LEVELS == 2
+#define SH_PRI_gpte "08x"
+#else /* GUEST_PAGING_LEVELS >= 3 */
+#ifndef __x86_64__
+#define SH_PRI_gpte "016llx"
+#else
+#define SH_PRI_gpte "016lx"
+#endif
+#endif /* GUEST_PAGING_LEVELS >= 3 */
+
+static inline u32
+accumulate_guest_flags(walk_t *gw)
+{
+    u32 accumulated_flags;
+
+    // We accumulate the permission flags with bitwise ANDing.
+    // This works for the PRESENT bit, RW bit, and USER bit.
+    // For the NX bit, however, the polarity is wrong, so we accumulate the
+    // inverse of the NX bit.
+    //
+    accumulated_flags =  guest_l1e_get_flags(gw->eff_l1e) ^ _PAGE_NX_BIT;
+    accumulated_flags &= guest_l2e_get_flags(*gw->l2e) ^ _PAGE_NX_BIT;
+
+    // Note that PAE guests do not have USER or RW or NX bits in their L3s.
+    //
+#if GUEST_PAGING_LEVELS == 3
+    accumulated_flags &=
+        ~_PAGE_PRESENT | (guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT);
+#elif GUEST_PAGING_LEVELS >= 4
+    accumulated_flags &= guest_l3e_get_flags(*gw->l3e) ^ _PAGE_NX_BIT;
+    accumulated_flags &= guest_l4e_get_flags(*gw->l4e) ^ _PAGE_NX_BIT;
+#endif
+
+    // Finally, revert the NX bit back to its original polarity
+    accumulated_flags ^= _PAGE_NX_BIT;
+
+    return accumulated_flags;
+}
+
+#endif /* _XEN_SHADOW_TYPES_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/shadow2-common.c b/xen/arch/x86/shadow2-common.c

deleted file mode 100644 (file)

index bdb7c38..0000000
--- a/xen/arch/x86/shadow2-common.c
+++ /dev/null
@@ -1,3407 +0,0 @@
-/******************************************************************************
- * arch/x86/shadow2-common.c
- *
- * Shadow2 code that does not need to be multiply compiled.
- * Parts of this code are Copyright (c) 2006 by XenSource Inc.
- * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
- * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#define SHADOW2 1
-
-#include <xen/config.h>
-#include <xen/types.h>
-#include <xen/mm.h>
-#include <xen/trace.h>
-#include <xen/sched.h>
-#include <xen/perfc.h>
-#include <xen/irq.h>
-#include <xen/domain_page.h>
-#include <xen/guest_access.h>
-#include <xen/keyhandler.h>
-#include <asm/event.h>
-#include <asm/page.h>
-#include <asm/current.h>
-#include <asm/flushtlb.h>
-#include <asm/shadow2.h>
-#include <asm/shadow2-private.h>
-
-#if SHADOW2_AUDIT
-int shadow2_audit_enable = 0;
-
-static void shadow2_audit_key(unsigned char key)
-{
-    shadow2_audit_enable = !shadow2_audit_enable;
-    printk("%s shadow2_audit_enable=%d\n",
-           __func__, shadow2_audit_enable);
-}
-
-static int __init shadow2_audit_key_init(void)
-{
-    register_keyhandler(
-        'O', shadow2_audit_key,  "toggle shadow2 audits");
-    return 0;
-}
-__initcall(shadow2_audit_key_init);
-#endif /* SHADOW2_AUDIT */
-
-static void sh2_free_log_dirty_bitmap(struct domain *d);
-
-int _shadow2_mode_refcounts(struct domain *d)
-{
-    return shadow2_mode_refcounts(d);
-}
-
-
-/**************************************************************************/
-/* x86 emulator support for the shadow2 code
- */
-
-static int
-sh2_x86_emulate_read_std(unsigned long addr,
-                         unsigned long *val,
-                         unsigned int bytes,
-                         struct x86_emulate_ctxt *ctxt)
-{
-    struct vcpu *v = current;
-    if ( hvm_guest(v) )
-    {
-        *val = 0;
-        // XXX -- this is WRONG.
-        //        It entirely ignores the permissions in the page tables.
-        //        In this case, that is only a user vs supervisor access check.
-        //
-        if ( hvm_copy(val, addr, bytes, HVM_COPY_IN) )
-        {
-#if 0
-            SHADOW2_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n",
-                           v->domain->domain_id, v->vcpu_id, 
-                           addr, *val, bytes);
-#endif
-            return X86EMUL_CONTINUE;
-        }
-
-        /* If we got here, there was nothing mapped here, or a bad GFN 
-         * was mapped here.  This should never happen: we're here because
-         * of a write fault at the end of the instruction we're emulating. */ 
-        SHADOW2_PRINTK("read failed to va %#lx\n", addr);
-        return X86EMUL_PROPAGATE_FAULT;
-    }
-    else 
-    {
-        SHADOW2_PRINTK("this operation is not emulated yet\n");
-        return X86EMUL_UNHANDLEABLE;
-    }
-}
-
-static int
-sh2_x86_emulate_write_std(unsigned long addr,
-                          unsigned long val,
-                          unsigned int bytes,
-                          struct x86_emulate_ctxt *ctxt)
-{
-    struct vcpu *v = current;
-#if 0
-    SHADOW2_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n",
-                  v->domain->domain_id, v->vcpu_id, addr, val, bytes);
-#endif
-    if ( hvm_guest(v) )
-    {
-        // XXX -- this is WRONG.
-        //        It entirely ignores the permissions in the page tables.
-        //        In this case, that includes user vs supervisor, and
-        //        write access.
-        //
-        if ( hvm_copy(&val, addr, bytes, HVM_COPY_OUT) )
-            return X86EMUL_CONTINUE;
-
-        /* If we got here, there was nothing mapped here, or a bad GFN 
-         * was mapped here.  This should never happen: we're here because
-         * of a write fault at the end of the instruction we're emulating,
-         * which should be handled by sh2_x86_emulate_write_emulated. */ 
-        SHADOW2_PRINTK("write failed to va %#lx\n", addr);
-        return X86EMUL_PROPAGATE_FAULT;
-    }
-    else 
-    {
-        SHADOW2_PRINTK("this operation is not emulated yet\n");
-        return X86EMUL_UNHANDLEABLE;
-    }
-}
-
-static int
-sh2_x86_emulate_write_emulated(unsigned long addr,
-                               unsigned long val,
-                               unsigned int bytes,
-                               struct x86_emulate_ctxt *ctxt)
-{
-    struct vcpu *v = current;
-#if 0
-    SHADOW2_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n",
-                  v->domain->domain_id, v->vcpu_id, addr, val, bytes);
-#endif
-    if ( hvm_guest(v) )
-    {
-        return v->arch.shadow2.mode->x86_emulate_write(v, addr, &val, bytes, ctxt);
-    }
-    else 
-    {
-        SHADOW2_PRINTK("this operation is not emulated yet\n");
-        return X86EMUL_UNHANDLEABLE;
-    }
-}
-
-static int 
-sh2_x86_emulate_cmpxchg_emulated(unsigned long addr,
-                                 unsigned long old,
-                                 unsigned long new,
-                                 unsigned int bytes,
-                                 struct x86_emulate_ctxt *ctxt)
-{
-    struct vcpu *v = current;
-#if 0
-    SHADOW2_PRINTK("d=%u v=%u a=%#lx o?=%#lx n:=%#lx bytes=%u\n",
-                   v->domain->domain_id, v->vcpu_id, addr, old, new, bytes);
-#endif
-    if ( hvm_guest(v) )
-    {
-        return v->arch.shadow2.mode->x86_emulate_cmpxchg(v, addr, old, new, 
-                                                    bytes, ctxt);
-    }
-    else 
-    {
-        SHADOW2_PRINTK("this operation is not emulated yet\n");
-        return X86EMUL_UNHANDLEABLE;
-    }
-}
-
-static int 
-sh2_x86_emulate_cmpxchg8b_emulated(unsigned long addr,
-                                   unsigned long old_lo,
-                                   unsigned long old_hi,
-                                   unsigned long new_lo,
-                                   unsigned long new_hi,
-                                   struct x86_emulate_ctxt *ctxt)
-{
-    struct vcpu *v = current;
-#if 0
-    SHADOW2_PRINTK("d=%u v=%u a=%#lx o?=%#lx:%lx n:=%#lx:%lx\n",
-                   v->domain->domain_id, v->vcpu_id, addr, old_hi, old_lo,
-                   new_hi, new_lo, ctxt);
-#endif
-    if ( hvm_guest(v) )
-    {
-        return v->arch.shadow2.mode->x86_emulate_cmpxchg8b(v, addr, old_lo, old_hi,
-                                                      new_lo, new_hi, ctxt);
-    }
-    else 
-    {
-        SHADOW2_PRINTK("this operation is not emulated yet\n");
-        return X86EMUL_UNHANDLEABLE;
-    }
-}
-
-
-struct x86_emulate_ops shadow2_emulator_ops = {
-    .read_std           = sh2_x86_emulate_read_std,
-    .write_std          = sh2_x86_emulate_write_std,
-    .read_emulated      = sh2_x86_emulate_read_std,
-    .write_emulated     = sh2_x86_emulate_write_emulated,
-    .cmpxchg_emulated   = sh2_x86_emulate_cmpxchg_emulated,
-    .cmpxchg8b_emulated = sh2_x86_emulate_cmpxchg8b_emulated,
-};
-
-
-/**************************************************************************/
-/* Code for "promoting" a guest page to the point where the shadow code is
- * willing to let it be treated as a guest page table.  This generally
- * involves making sure there are no writable mappings available to the guest
- * for this page.
- */
-void shadow2_promote(struct vcpu *v, mfn_t gmfn, u32 type)
-{
-    struct page_info *page = mfn_to_page(gmfn);
-    unsigned long type_info;
-
-    ASSERT(valid_mfn(gmfn));
-
-    /* We should never try to promote a gmfn that has writeable mappings */
-    ASSERT(shadow2_remove_write_access(v, gmfn, 0, 0) == 0);
-
-    // Is the page already shadowed?
-    if ( !test_and_set_bit(_PGC_page_table, &page->count_info) )
-    {
-        // No prior shadow exists...
-
-        // Grab a type-ref.  We don't really care if we are racing with another
-        // vcpu or not, or even what kind of type we get; we just want the type
-        // count to be > 0.
-        //
-        do {
-            type_info =
-                page->u.inuse.type_info & (PGT_type_mask | PGT_va_mask);
-        } while ( !get_page_type(page, type_info) );
-
-        // Now that the type ref is non-zero, we can safely use the
-        // shadow2_flags.
-        //
-        page->shadow2_flags = 0;
-    }
-
-    ASSERT(!test_bit(type >> PGC_SH2_type_shift, &page->shadow2_flags));
-    set_bit(type >> PGC_SH2_type_shift, &page->shadow2_flags);
-}
-
-void shadow2_demote(struct vcpu *v, mfn_t gmfn, u32 type)
-{
-    struct page_info *page = mfn_to_page(gmfn);
-
-    ASSERT(test_bit(_PGC_page_table, &page->count_info));
-    ASSERT(test_bit(type >> PGC_SH2_type_shift, &page->shadow2_flags));
-
-    clear_bit(type >> PGC_SH2_type_shift, &page->shadow2_flags);
-
-    if ( (page->shadow2_flags & SH2F_page_type_mask) == 0 )
-    {
-        // release the extra type ref
-        put_page_type(page);
-
-        // clear the is-a-page-table bit.
-        clear_bit(_PGC_page_table, &page->count_info);
-    }
-}
-
-/**************************************************************************/
-/* Validate a pagetable change from the guest and update the shadows.
- * Returns a bitmask of SHADOW2_SET_* flags. */
-
-static int
-__shadow2_validate_guest_entry(struct vcpu *v, mfn_t gmfn, 
-                               void *entry, u32 size)
-{
-    int result = 0;
-    struct page_info *page = mfn_to_page(gmfn);
-
-    sh2_mark_dirty(v->domain, gmfn);
-    
-    // Determine which types of shadows are affected, and update each.
-    //
-    // Always validate L1s before L2s to prevent another cpu with a linear
-    // mapping of this gmfn from seeing a walk that results from 
-    // using the new L2 value and the old L1 value.  (It is OK for such a
-    // guest to see a walk that uses the old L2 value with the new L1 value,
-    // as hardware could behave this way if one level of the pagewalk occurs
-    // before the store, and the next level of the pagewalk occurs after the
-    // store.
-    //
-    // Ditto for L2s before L3s, etc.
-    //
-
-    if ( !(page->count_info & PGC_page_table) )
-        return 0;  /* Not shadowed at all */
-
-#if CONFIG_PAGING_LEVELS == 2
-    if ( page->shadow2_flags & SH2F_L1_32 ) 
-        result |= SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl1e, 2, 2)
-            (v, gmfn, entry, size);
-#else 
-    if ( page->shadow2_flags & SH2F_L1_32 ) 
-        result |= SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl1e, 3, 2)
-            (v, gmfn, entry, size);
-#endif
-
-#if CONFIG_PAGING_LEVELS == 2
-    if ( page->shadow2_flags & SH2F_L2_32 ) 
-        result |= SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl2e, 2, 2)
-            (v, gmfn, entry, size);
-#else 
-    if ( page->shadow2_flags & SH2F_L2_32 ) 
-        result |= SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl2e, 3, 2)
-            (v, gmfn, entry, size);
-#endif
-
-#if CONFIG_PAGING_LEVELS >= 3 
-    if ( page->shadow2_flags & SH2F_L1_PAE ) 
-        result |= SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl1e, 3, 3)
-            (v, gmfn, entry, size);
-    if ( page->shadow2_flags & SH2F_L2_PAE ) 
-        result |= SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl2e, 3, 3)
-            (v, gmfn, entry, size);
-    if ( page->shadow2_flags & SH2F_L2H_PAE ) 
-        result |= SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl2he, 3, 3)
-            (v, gmfn, entry, size);
-    if ( page->shadow2_flags & SH2F_L3_PAE ) 
-        result |= SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl3e, 3, 3)
-            (v, gmfn, entry, size);
-#else /* 32-bit non-PAE hypervisor does not support PAE guests */
-    ASSERT((page->shadow2_flags & (SH2F_L3_PAE|SH2F_L2_PAE|SH2F_L1_PAE)) == 0);
-#endif
-
-#if CONFIG_PAGING_LEVELS >= 4 
-    if ( page->shadow2_flags & SH2F_L1_64 ) 
-        result |= SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl1e, 4, 4)
-            (v, gmfn, entry, size);
-    if ( page->shadow2_flags & SH2F_L2_64 ) 
-        result |= SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl2e, 4, 4)
-            (v, gmfn, entry, size);
-    if ( page->shadow2_flags & SH2F_L3_64 ) 
-        result |= SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl3e, 4, 4)
-            (v, gmfn, entry, size);
-    if ( page->shadow2_flags & SH2F_L4_64 ) 
-        result |= SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl4e, 4, 4)
-            (v, gmfn, entry, size);
-#else /* 32-bit/PAE hypervisor does not support 64-bit guests */
-    ASSERT((page->shadow2_flags 
-            & (SH2F_L4_64|SH2F_L3_64|SH2F_L2_64|SH2F_L1_64)) == 0);
-#endif
-
-    return result;
-}
-
-
-int
-shadow2_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry)
-/* This is the entry point from hypercalls. It returns a bitmask of all the 
- * results of shadow_set_l*e() calls, so the caller knows to do TLB flushes. */
-{
-    int rc;
-
-    ASSERT(shadow2_lock_is_acquired(v->domain));
-    rc = __shadow2_validate_guest_entry(v, gmfn, entry, sizeof(l1_pgentry_t));
-    shadow2_audit_tables(v);
-    return rc;
-}
-
-void
-shadow2_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn,
-                                void *entry, u32 size)
-/* This is the entry point for emulated writes to pagetables in HVM guests */
-{
-    struct domain *d = v->domain;
-    int rc;
-
-    ASSERT(shadow2_lock_is_acquired(v->domain));
-    rc = __shadow2_validate_guest_entry(v, gmfn, entry, size);
-    if ( rc & SHADOW2_SET_FLUSH )
-    {
-        // Flush everyone except the local processor, which will flush when it
-        // re-enters the HVM guest.
-        //
-        cpumask_t mask = d->domain_dirty_cpumask;
-        cpu_clear(v->processor, mask);
-        flush_tlb_mask(mask);
-    }
-    if ( rc & SHADOW2_SET_ERROR ) 
-    {
-        /* This page is probably not a pagetable any more: tear it out of the 
-         * shadows, along with any tables that reference it */
-        shadow2_remove_all_shadows_and_parents(v, gmfn);
-    }
-    /* We ignore the other bits: since we are about to change CR3 on
-     * VMENTER we don't need to do any extra TLB flushes. */ 
-}
-
-
-/**************************************************************************/
-/* Memory management for shadow pages. */ 
-
-/* Meaning of the count_info field in shadow pages
- * ----------------------------------------------
- * 
- * A count of all references to this page from other shadow pages and
- * guest CR3s (a.k.a. v->arch.shadow2.table).  
- *
- * The top bits hold the shadow type and the pinned bit.  Top-level
- * shadows are pinned so that they don't disappear when not in a CR3
- * somewhere.
- *
- * We don't need to use get|put_page for this as the updates are all
- * protected by the shadow lock.  We can't use get|put_page for this
- * as the size of the count on shadow pages is different from that on
- * normal guest pages.
- */
-
-/* Meaning of the type_info field in shadow pages
- * ----------------------------------------------
- * 
- * type_info use depends on the shadow type (from count_info)
- * 
- * PGC_SH2_none : This page is in the shadow2 free pool.  type_info holds
- *                the chunk order for our freelist allocator.
- *
- * PGC_SH2_l*_shadow : This page is in use as a shadow. type_info 
- *                     holds the mfn of the guest page being shadowed,
- *
- * PGC_SH2_fl1_*_shadow : This page is being used to shatter a superpage.
- *                        type_info holds the gfn being shattered.
- *
- * PGC_SH2_monitor_table : This page is part of a monitor table.
- *                         type_info is not used.
- */
-
-/* Meaning of the _domain field in shadow pages
- * --------------------------------------------
- *
- * In shadow pages, this field will always have its least significant bit
- * set.  This ensures that all attempts to get_page() will fail (as all
- * valid pickled domain pointers have a zero for their least significant bit).
- * Instead, the remaining upper bits are used to record the shadow generation
- * counter when the shadow was created.
- */
-
-/* Meaning of the shadow2_flags field
- * ----------------------------------
- * 
- * In guest pages that are shadowed, one bit for each kind of shadow they have.
- * 
- * In shadow pages, will be used for holding a representation of the populated
- * entries in this shadow (either a min/max, or a bitmap, or ...)
- *
- * In monitor-table pages, holds the level of the particular page (to save
- * spilling the shadow types into an extra bit by having three types of monitor
- * page).
- */
-
-/* Meaning of the list_head struct in shadow pages
- * -----------------------------------------------
- *
- * In free shadow pages, this is used to hold the free-lists of chunks.
- *
- * In top-level shadow tables, this holds a linked-list of all top-level
- * shadows (used for recovering memory and destroying shadows). 
- *
- * In lower-level shadows, this holds the physical address of a higher-level
- * shadow entry that holds a reference to this shadow (or zero).
- */
-
-/* Allocating shadow pages
- * -----------------------
- *
- * Most shadow pages are allocated singly, but there are two cases where we 
- * need to allocate multiple pages together.
- * 
- * 1: Shadowing 32-bit guest tables on PAE or 64-bit shadows.
- *    A 32-bit guest l1 table covers 4MB of virtuial address space,
- *    and needs to be shadowed by two PAE/64-bit l1 tables (covering 2MB
- *    of virtual address space each).  Similarly, a 32-bit guest l2 table 
- *    (4GB va) needs to be shadowed by four PAE/64-bit l2 tables (1GB va 
- *    each).  These multi-page shadows are contiguous and aligned; 
- *    functions for handling offsets into them are defined in shadow2.c 
- *    (shadow_l1_index() etc.)
- *    
- * 2: Shadowing PAE top-level pages.  Each guest page that contains
- *    any PAE top-level pages requires two shadow pages to shadow it.
- *    They contain alternating l3 tables and pae_l3_bookkeeping structs.
- *
- * This table shows the allocation behaviour of the different modes:
- *
- * Xen paging      32b  pae  pae  64b  64b  64b
- * Guest paging    32b  32b  pae  32b  pae  64b
- * PV or HVM        *   HVM   *   HVM  HVM   * 
- * Shadow paging   32b  pae  pae  pae  pae  64b
- *
- * sl1 size         4k   8k   4k   8k   4k   4k
- * sl2 size         4k  16k   4k  16k   4k   4k
- * sl3 size         -    -    8k   -    8k   4k
- * sl4 size         -    -    -    -    -    4k
- *
- * We allocate memory from xen in four-page units and break them down
- * with a simple buddy allocator.  Can't use the xen allocator to handle
- * this as it only works for contiguous zones, and a domain's shadow
- * pool is made of fragments.
- *
- * In HVM guests, the p2m table is built out of shadow pages, and we provide 
- * a function for the p2m management to steal pages, in max-order chunks, from 
- * the free pool.  We don't provide for giving them back, yet.
- */
-
-/* Figure out the least acceptable quantity of shadow memory.
- * The minimum memory requirement for always being able to free up a
- * chunk of memory is very small -- only three max-order chunks per
- * vcpu to hold the top level shadows and pages with Xen mappings in them.  
- *
- * But for a guest to be guaranteed to successfully execute a single
- * instruction, we must be able to map a large number (about thirty) VAs
- * at the same time, which means that to guarantee progress, we must
- * allow for more than ninety allocated pages per vcpu.  We round that
- * up to 128 pages, or half a megabyte per vcpu. */
-unsigned int shadow2_min_acceptable_pages(struct domain *d) 
-{
-    u32 vcpu_count = 0;
-    struct vcpu *v;
-
-    for_each_vcpu(d, v)
-        vcpu_count++;
-
-    return (vcpu_count * 128);
-}
-
-/* Using the type_info field to store freelist order */
-#define SH2_PFN_ORDER(_p) ((_p)->u.inuse.type_info)
-#define SH2_SET_PFN_ORDER(_p, _o)                       \
- do { (_p)->u.inuse.type_info = (_o); } while (0)
- 
-
-/* Figure out the order of allocation needed for a given shadow type */
-static inline u32
-shadow_order(u32 shadow_type) 
-{
-#if CONFIG_PAGING_LEVELS > 2
-    static const u32 type_to_order[16] = {
-        0, /* PGC_SH2_none           */
-        1, /* PGC_SH2_l1_32_shadow   */
-        1, /* PGC_SH2_fl1_32_shadow  */
-        2, /* PGC_SH2_l2_32_shadow   */
-        0, /* PGC_SH2_l1_pae_shadow  */
-        0, /* PGC_SH2_fl1_pae_shadow */
-        0, /* PGC_SH2_l2_pae_shadow  */
-        0, /* PGC_SH2_l2h_pae_shadow */
-        1, /* PGC_SH2_l3_pae_shadow  */
-        0, /* PGC_SH2_l1_64_shadow   */
-        0, /* PGC_SH2_fl1_64_shadow  */
-        0, /* PGC_SH2_l2_64_shadow   */
-        0, /* PGC_SH2_l3_64_shadow   */
-        0, /* PGC_SH2_l4_64_shadow   */
-        2, /* PGC_SH2_p2m_table      */
-        0  /* PGC_SH2_monitor_table  */
-        };
-    u32 type = (shadow_type & PGC_SH2_type_mask) >> PGC_SH2_type_shift;
-    return type_to_order[type];
-#else  /* 32-bit Xen only ever shadows 32-bit guests on 32-bit shadows. */
-    return 0;
-#endif
-}
-
-
-/* Do we have a free chunk of at least this order? */
-static inline int chunk_is_available(struct domain *d, int order)
-{
-    int i;
-    
-    for ( i = order; i <= SHADOW2_MAX_ORDER; i++ )
-        if ( !list_empty(&d->arch.shadow2.freelists[i]) )
-            return 1;
-    return 0;
-}
-
-/* Dispatcher function: call the per-mode function that will unhook the
- * non-Xen mappings in this top-level shadow mfn */
-void shadow2_unhook_mappings(struct vcpu *v, mfn_t smfn)
-{
-    struct page_info *pg = mfn_to_page(smfn);
-    switch ( (pg->count_info & PGC_SH2_type_mask) >> PGC_SH2_type_shift )
-    {
-    case PGC_SH2_l2_32_shadow >> PGC_SH2_type_shift:
-#if CONFIG_PAGING_LEVELS == 2
-        SHADOW2_INTERNAL_NAME(sh2_unhook_32b_mappings,2,2)(v,smfn);
-#else
-        SHADOW2_INTERNAL_NAME(sh2_unhook_32b_mappings,3,2)(v,smfn);
-#endif
-        break;
-#if CONFIG_PAGING_LEVELS >= 3
-    case PGC_SH2_l3_pae_shadow >> PGC_SH2_type_shift:
-        SHADOW2_INTERNAL_NAME(sh2_unhook_pae_mappings,3,3)(v,smfn);
-        break;
-#endif
-#if CONFIG_PAGING_LEVELS >= 4
-    case PGC_SH2_l4_64_shadow >> PGC_SH2_type_shift:
-        SHADOW2_INTERNAL_NAME(sh2_unhook_64b_mappings,4,4)(v,smfn);
-        break;
-#endif
-    default:
-        SHADOW2_PRINTK("top-level shadow has bad type %08lx\n", 
-                       (unsigned long)((pg->count_info & PGC_SH2_type_mask)
-                                       >> PGC_SH2_type_shift));
-        BUG();
-    }
-}
-
-
-/* Make sure there is at least one chunk of the required order available
- * in the shadow page pool. This must be called before any calls to
- * shadow2_alloc().  Since this will free existing shadows to make room,
- * it must be called early enough to avoid freeing shadows that the
- * caller is currently working on. */
-void shadow2_prealloc(struct domain *d, unsigned int order)
-{
-    /* Need a vpcu for calling unpins; for now, since we don't have
-     * per-vcpu shadows, any will do */
-    struct vcpu *v = d->vcpu[0];
-    struct list_head *l, *t;
-    struct page_info *pg;
-    mfn_t smfn;
-
-    if ( chunk_is_available(d, order) ) return; 
-    
-    /* Stage one: walk the list of top-level pages, unpinning them */
-    perfc_incrc(shadow2_prealloc_1);
-    list_for_each_backwards_safe(l, t, &d->arch.shadow2.toplevel_shadows)
-    {
-        pg = list_entry(l, struct page_info, list);
-        smfn = page_to_mfn(pg);
-
-#if CONFIG_PAGING_LEVELS >= 3
-        if ( (pg->count_info & PGC_SH2_type_mask) == PGC_SH2_l3_pae_shadow )
-        {
-            /* For PAE, we need to unpin each subshadow on this shadow */
-            SHADOW2_INTERNAL_NAME(sh2_unpin_all_l3_subshadows,3,3)(v, smfn);
-        } 
-        else 
-#endif /* 32-bit code always takes this branch */
-        {
-            /* Unpin this top-level shadow */
-            sh2_unpin(v, smfn);
-        }
-
-        /* See if that freed up a chunk of appropriate size */
-        if ( chunk_is_available(d, order) ) return;
-    }
-
-    /* Stage two: all shadow pages are in use in hierarchies that are
-     * loaded in cr3 on some vcpu.  Walk them, unhooking the non-Xen
-     * mappings. */
-    perfc_incrc(shadow2_prealloc_2);
-    v = current;
-    if ( v->domain != d )
-        v = d->vcpu[0];
-    /* Walk the list from the tail: recently used toplevels have been pulled
-     * to the head */
-    list_for_each_backwards_safe(l, t, &d->arch.shadow2.toplevel_shadows)
-    {
-        pg = list_entry(l, struct page_info, list);
-        smfn = page_to_mfn(pg);
-        shadow2_unhook_mappings(v, smfn);
-
-        /* Need to flush TLB if we've altered our own tables */
-        if ( !shadow2_mode_external(d) 
-             && pagetable_get_pfn(current->arch.shadow_table) == mfn_x(smfn) )
-            local_flush_tlb();
-        
-        /* See if that freed up a chunk of appropriate size */
-        if ( chunk_is_available(d, order) ) return;
-    }
-    
-    /* Nothing more we can do: all remaining shadows are of pages that
-     * hold Xen mappings for some vcpu.  This can never happen. */
-    SHADOW2_PRINTK("Can't pre-allocate %i shadow pages!\n"
-                   "  shadow pages total = %u, free = %u, p2m=%u\n",
-                   1 << order, 
-                   d->arch.shadow2.total_pages, 
-                   d->arch.shadow2.free_pages, 
-                   d->arch.shadow2.p2m_pages);
-    BUG();
-}
-
-
-/* Allocate another shadow's worth of (contiguous, aligned) pages,
- * and fill in the type and backpointer fields of their page_infos. 
- * Never fails to allocate. */
-mfn_t shadow2_alloc(struct domain *d,  
-                    u32 shadow_type,
-                    unsigned long backpointer)
-{
-    struct page_info *pg = NULL;
-    unsigned int order = shadow_order(shadow_type);
-    cpumask_t mask;
-    void *p;
-    int i;
-
-    ASSERT(shadow2_lock_is_acquired(d));
-    ASSERT(order <= SHADOW2_MAX_ORDER);
-    ASSERT(shadow_type != PGC_SH2_none);
-    perfc_incrc(shadow2_alloc);
-
-    /* Find smallest order which can satisfy the request. */
-    for ( i = order; i <= SHADOW2_MAX_ORDER; i++ )
-        if ( !list_empty(&d->arch.shadow2.freelists[i]) )
-        {
-            pg = list_entry(d->arch.shadow2.freelists[i].next, 
-                            struct page_info, list);
-            list_del(&pg->list);
-            
-            /* We may have to halve the chunk a number of times. */
-            while ( i != order )
-            {
-                i--;
-                SH2_SET_PFN_ORDER(pg, i);
-                list_add_tail(&pg->list, &d->arch.shadow2.freelists[i]);
-                pg += 1 << i;
-            }
-            d->arch.shadow2.free_pages -= 1 << order;
-
-            /* Init page info fields and clear the pages */
-            for ( i = 0; i < 1<<order ; i++ ) 
-            {
-                pg[i].u.inuse.type_info = backpointer;
-                pg[i].count_info = shadow_type;
-                pg[i].shadow2_flags = 0;
-                INIT_LIST_HEAD(&pg[i].list);
-                /* Before we overwrite the old contents of this page, 
-                 * we need to be sure that no TLB holds a pointer to it. */
-                mask = d->domain_dirty_cpumask;
-                tlbflush_filter(mask, pg[i].tlbflush_timestamp);
-                if ( unlikely(!cpus_empty(mask)) )
-                {
-                    perfc_incrc(shadow2_alloc_tlbflush);
-                    flush_tlb_mask(mask);
-                }
-                /* Now safe to clear the page for reuse */
-                p = sh2_map_domain_page(page_to_mfn(pg+i));
-                ASSERT(p != NULL);
-                clear_page(p);
-                sh2_unmap_domain_page(p);
-                perfc_incr(shadow2_alloc_count);
-            }
-            return page_to_mfn(pg);
-        }
-    
-    /* If we get here, we failed to allocate. This should never happen.
-     * It means that we didn't call shadow2_prealloc() correctly before
-     * we allocated.  We can't recover by calling prealloc here, because
-     * we might free up higher-level pages that the caller is working on. */
-    SHADOW2_PRINTK("Can't allocate %i shadow pages!\n", 1 << order);
-    BUG();
-}
-
-
-/* Return some shadow pages to the pool. */
-void shadow2_free(struct domain *d, mfn_t smfn)
-{
-    struct page_info *pg = mfn_to_page(smfn); 
-    u32 shadow_type;
-    unsigned long order;
-    unsigned long mask;
-    int i;
-
-    ASSERT(shadow2_lock_is_acquired(d));
-    perfc_incrc(shadow2_free);
-
-    shadow_type = pg->count_info & PGC_SH2_type_mask;
-    ASSERT(shadow_type != PGC_SH2_none);
-    ASSERT(shadow_type != PGC_SH2_p2m_table);
-    order = shadow_order(shadow_type);
-
-    d->arch.shadow2.free_pages += 1 << order;
-
-    for ( i = 0; i < 1<<order; i++ ) 
-    {
-        /* Strip out the type: this is now a free shadow page */
-        pg[i].count_info = 0;
-        /* Remember the TLB timestamp so we will know whether to flush 
-         * TLBs when we reuse the page.  Because the destructors leave the
-         * contents of the pages in place, we can delay TLB flushes until
-         * just before the allocator hands the page out again. */
-        pg[i].tlbflush_timestamp = tlbflush_current_time();
-        perfc_decr(shadow2_alloc_count);
-    }
-
-    /* Merge chunks as far as possible. */
-    while ( order < SHADOW2_MAX_ORDER )
-    {
-        mask = 1 << order;
-        if ( (mfn_x(page_to_mfn(pg)) & mask) ) {
-            /* Merge with predecessor block? */
-            if ( (((pg-mask)->count_info & PGC_SH2_type_mask) != PGT_none) 
-                 || (SH2_PFN_ORDER(pg-mask) != order) )
-                break;
-            list_del(&(pg-mask)->list);
-            pg -= mask;
-        } else {
-            /* Merge with successor block? */
-            if ( (((pg+mask)->count_info & PGC_SH2_type_mask) != PGT_none)
-                 || (SH2_PFN_ORDER(pg+mask) != order) )
-                break;
-            list_del(&(pg+mask)->list);
-        }
-        order++;
-    }
-
-    SH2_SET_PFN_ORDER(pg, order);
-    list_add_tail(&pg->list, &d->arch.shadow2.freelists[order]);
-}
-
-/* Divert some memory from the pool to be used by the p2m mapping.
- * This action is irreversible: the p2m mapping only ever grows.
- * That's OK because the p2m table only exists for external domains,
- * and those domains can't ever turn off shadow mode.
- * Also, we only ever allocate a max-order chunk, so as to preserve
- * the invariant that shadow2_prealloc() always works.
- * Returns 0 iff it can't get a chunk (the caller should then
- * free up some pages in domheap and call set_sh2_allocation);
- * returns non-zero on success.
- */
-static int
-shadow2_alloc_p2m_pages(struct domain *d)
-{
-    struct page_info *pg;
-    u32 i;
-    ASSERT(shadow2_lock_is_acquired(d));
-    
-    if ( d->arch.shadow2.total_pages 
-         < (shadow2_min_acceptable_pages(d) + (1<<SHADOW2_MAX_ORDER)) )
-        return 0; /* Not enough shadow memory: need to increase it first */
-    
-    pg = mfn_to_page(shadow2_alloc(d, PGC_SH2_p2m_table, 0));
-    d->arch.shadow2.p2m_pages += (1<<SHADOW2_MAX_ORDER);
-    d->arch.shadow2.total_pages -= (1<<SHADOW2_MAX_ORDER);
-    for (i = 0; i < (1<<SHADOW2_MAX_ORDER); i++)
-    {
-        /* Unlike shadow pages, mark p2m pages as owned by the domain */
-        page_set_owner(&pg[i], d);
-        list_add_tail(&pg[i].list, &d->arch.shadow2.p2m_freelist);
-    }
-    return 1;
-}
-
-// Returns 0 if no memory is available...
-mfn_t
-shadow2_alloc_p2m_page(struct domain *d)
-{
-    struct list_head *entry;
-    mfn_t mfn;
-    void *p;
-
-    if ( list_empty(&d->arch.shadow2.p2m_freelist) &&
-         !shadow2_alloc_p2m_pages(d) )
-        return _mfn(0);
-    entry = d->arch.shadow2.p2m_freelist.next;
-    list_del(entry);
-    list_add_tail(entry, &d->arch.shadow2.p2m_inuse);
-    mfn = page_to_mfn(list_entry(entry, struct page_info, list));
-    sh2_get_ref(mfn, 0);
-    p = sh2_map_domain_page(mfn);
-    clear_page(p);
-    sh2_unmap_domain_page(p);
-
-    return mfn;
-}
-
-#if CONFIG_PAGING_LEVELS == 3
-static void p2m_install_entry_in_monitors(struct domain *d, 
-                                          l3_pgentry_t *l3e) 
-/* Special case, only used for external-mode domains on PAE hosts:
- * update the mapping of the p2m table.  Once again, this is trivial in
- * other paging modes (one top-level entry points to the top-level p2m,
- * no maintenance needed), but PAE makes life difficult by needing a
- * copy the eight l3es of the p2m table in eight l2h slots in the
- * monitor table.  This function makes fresh copies when a p2m l3e
- * changes. */
-{
-    l2_pgentry_t *ml2e;
-    struct vcpu *v;
-    unsigned int index;
-
-    index = ((unsigned long)l3e & ~PAGE_MASK) / sizeof(l3_pgentry_t);
-    ASSERT(index < MACHPHYS_MBYTES>>1);
-
-    for_each_vcpu(d, v) 
-    {
-        if ( pagetable_get_pfn(v->arch.monitor_table) == 0 ) 
-            continue;
-        ASSERT(shadow2_mode_external(v->domain));
-
-        SHADOW2_DEBUG(P2M, "d=%u v=%u index=%u mfn=%#lx\n",
-                      d->domain_id, v->vcpu_id, index, l3e_get_pfn(*l3e));
-
-        if ( v == current ) /* OK to use linear map of monitor_table */
-            ml2e = __linear_l2_table + l2_linear_offset(RO_MPT_VIRT_START);
-        else 
-        {
-            l3_pgentry_t *ml3e;
-            ml3e = sh2_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
-            ASSERT(l3e_get_flags(ml3e[3]) & _PAGE_PRESENT);
-            ml2e = sh2_map_domain_page(_mfn(l3e_get_pfn(ml3e[3])));
-            ml2e += l2_table_offset(RO_MPT_VIRT_START);
-            sh2_unmap_domain_page(ml3e);
-        }
-        ml2e[index] = l2e_from_pfn(l3e_get_pfn(*l3e), __PAGE_HYPERVISOR);
-        if ( v != current )
-            sh2_unmap_domain_page(ml2e);
-    }
-}
-#endif
-
-// Find the next level's P2M entry, checking for out-of-range gfn's...
-// Returns NULL on error.
-//
-static l1_pgentry_t *
-p2m_find_entry(void *table, unsigned long *gfn_remainder,
-                   unsigned long gfn, u32 shift, u32 max)
-{
-    u32 index;
-
-    index = *gfn_remainder >> shift;
-    if ( index >= max )
-    {
-        SHADOW2_DEBUG(P2M, "gfn=0x%lx out of range "
-                      "(gfn_remainder=0x%lx shift=%d index=0x%x max=0x%x)\n",
-                       gfn, *gfn_remainder, shift, index, max);
-        return NULL;
-    }
-    *gfn_remainder &= (1 << shift) - 1;
-    return (l1_pgentry_t *)table + index;
-}
-
-// Walk one level of the P2M table, allocating a new table if required.
-// Returns 0 on error.
-//
-static int
-p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table, 
-               unsigned long *gfn_remainder, unsigned long gfn, u32 shift, 
-               u32 max, unsigned long type)
-{
-    l1_pgentry_t *p2m_entry;
-    void *next;
-
-    if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
-                                      shift, max)) )
-        return 0;
-
-    if ( !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) )
-    {
-        mfn_t mfn = shadow2_alloc_p2m_page(d);
-        if ( mfn_x(mfn) == 0 )
-            return 0;
-        *p2m_entry = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER);
-        mfn_to_page(mfn)->u.inuse.type_info = type | 1 | PGT_validated;
-        mfn_to_page(mfn)->count_info = 1;
-#if CONFIG_PAGING_LEVELS == 3
-        if (type == PGT_l2_page_table)
-        {
-            /* We have written to the p2m l3: need to sync the per-vcpu
-             * copies of it in the monitor tables */
-            p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p2m_entry);
-        }
-#endif
-        /* The P2M can be shadowed: keep the shadows synced */
-        if ( d->vcpu[0] )
-            (void)__shadow2_validate_guest_entry(d->vcpu[0], *table_mfn,
-                                                 p2m_entry, sizeof *p2m_entry);
-    }
-    *table_mfn = _mfn(l1e_get_pfn(*p2m_entry));
-    next = sh2_map_domain_page(*table_mfn);
-    sh2_unmap_domain_page(*table);
-    *table = next;
-
-    return 1;
-}
-
-// Returns 0 on error (out of memory)
-int
-shadow2_set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
-{
-    // XXX -- this might be able to be faster iff current->domain == d
-    mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
-    void *table = sh2_map_domain_page(table_mfn);
-    unsigned long gfn_remainder = gfn;
-    l1_pgentry_t *p2m_entry;
-
-#if CONFIG_PAGING_LEVELS >= 4
-    if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
-                         L4_PAGETABLE_SHIFT - PAGE_SHIFT,
-                         L4_PAGETABLE_ENTRIES, PGT_l3_page_table) )
-        return 0;
-#endif
-#if CONFIG_PAGING_LEVELS >= 3
-    // When using PAE Xen, we only allow 33 bits of pseudo-physical
-    // address in translated guests (i.e. 8 GBytes).  This restriction
-    // comes from wanting to map the P2M table into the 16MB RO_MPT hole
-    // in Xen's address space for translated PV guests.
-    //
-    if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
-                         L3_PAGETABLE_SHIFT - PAGE_SHIFT,
-                         (CONFIG_PAGING_LEVELS == 3
-                          ? 8
-                          : L3_PAGETABLE_ENTRIES),
-                         PGT_l2_page_table) )
-        return 0;
-#endif
-    if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
-                         L2_PAGETABLE_SHIFT - PAGE_SHIFT,
-                         L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
-        return 0;
-
-    p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
-                               0, L1_PAGETABLE_ENTRIES);
-    ASSERT(p2m_entry);
-    if ( valid_mfn(mfn) )
-        *p2m_entry = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER);
-    else
-        *p2m_entry = l1e_empty();
-
-    /* The P2M can be shadowed: keep the shadows synced */
-    (void) __shadow2_validate_guest_entry(d->vcpu[0], table_mfn, 
-                                          p2m_entry, sizeof *p2m_entry);
-
-    sh2_unmap_domain_page(table);
-
-    return 1;
-}
-
-// Allocate a new p2m table for a domain.
-//
-// The structure of the p2m table is that of a pagetable for xen (i.e. it is
-// controlled by CONFIG_PAGING_LEVELS).
-//
-// Returns 0 if p2m table could not be initialized
-//
-static int
-shadow2_alloc_p2m_table(struct domain *d)
-{
-    mfn_t p2m_top;
-    struct list_head *entry;
-    unsigned int page_count = 0;
-    
-    SHADOW2_PRINTK("allocating p2m table\n");
-    ASSERT(pagetable_get_pfn(d->arch.phys_table) == 0);
-
-    p2m_top = shadow2_alloc_p2m_page(d);
-    mfn_to_page(p2m_top)->count_info = 1;
-    mfn_to_page(p2m_top)->u.inuse.type_info = 
-#if CONFIG_PAGING_LEVELS == 4
-        PGT_l4_page_table
-#elif CONFIG_PAGING_LEVELS == 3
-        PGT_l3_page_table
-#elif CONFIG_PAGING_LEVELS == 2
-        PGT_l2_page_table
-#endif
-        | 1 | PGT_validated;
-   
-    if ( mfn_x(p2m_top) == 0 )
-        return 0;
-
-    d->arch.phys_table = pagetable_from_mfn(p2m_top);
-
-    SHADOW2_PRINTK("populating p2m table\n");
- 
-    for ( entry = d->page_list.next;
-          entry != &d->page_list;
-          entry = entry->next )
-    {
-        struct page_info *page = list_entry(entry, struct page_info, list);
-        mfn_t mfn = page_to_mfn(page);
-        unsigned long gfn = get_gpfn_from_mfn(mfn_x(mfn));
-        page_count++;
-        if (
-#ifdef __x86_64__
-            (gfn != 0x5555555555555555L)
-#else
-            (gfn != 0x55555555L)
-#endif
-             && gfn != INVALID_M2P_ENTRY
-             && !shadow2_set_p2m_entry(d, gfn, mfn) )
-        {
-            SHADOW2_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%" SH2_PRI_mfn "\n",
-                           gfn, mfn_x(mfn));
-            return 0;
-        }
-    }
-
-    SHADOW2_PRINTK("p2m table initialised (%u pages)\n", page_count);
-    return 1;
-}
-
-mfn_t
-sh2_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
-/* Read another domain's p2m entries */
-{
-    mfn_t mfn;
-    unsigned long addr = gpfn << PAGE_SHIFT;
-    l2_pgentry_t *l2e;
-    l1_pgentry_t *l1e;
-    
-    ASSERT(shadow2_mode_translate(d));
-    mfn = pagetable_get_mfn(d->arch.phys_table);
-
-
-#if CONFIG_PAGING_LEVELS > 2
-    if ( gpfn > (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t) ) 
-        /* This pfn is higher than the p2m map can hold */
-        return _mfn(INVALID_MFN);
-#endif
-
-
-#if CONFIG_PAGING_LEVELS >= 4
-    { 
-        l4_pgentry_t *l4e = sh2_map_domain_page(mfn);
-        l4e += l4_table_offset(addr);
-        if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 )
-        {
-            sh2_unmap_domain_page(l4e);
-            return _mfn(INVALID_MFN);
-        }
-        mfn = _mfn(l4e_get_pfn(*l4e));
-        sh2_unmap_domain_page(l4e);
-    }
-#endif
-#if CONFIG_PAGING_LEVELS >= 3
-    {
-        l3_pgentry_t *l3e = sh2_map_domain_page(mfn);
-        l3e += l3_table_offset(addr);
-        if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
-        {
-            sh2_unmap_domain_page(l3e);
-            return _mfn(INVALID_MFN);
-        }
-        mfn = _mfn(l3e_get_pfn(*l3e));
-        sh2_unmap_domain_page(l3e);
-    }
-#endif
-
-    l2e = sh2_map_domain_page(mfn);
-    l2e += l2_table_offset(addr);
-    if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 )
-    {
-        sh2_unmap_domain_page(l2e);
-        return _mfn(INVALID_MFN);
-    }
-    mfn = _mfn(l2e_get_pfn(*l2e));
-    sh2_unmap_domain_page(l2e);
-
-    l1e = sh2_map_domain_page(mfn);
-    l1e += l1_table_offset(addr);
-    if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 )
-    {
-        sh2_unmap_domain_page(l1e);
-        return _mfn(INVALID_MFN);
-    }
-    mfn = _mfn(l1e_get_pfn(*l1e));
-    sh2_unmap_domain_page(l1e);
-
-    return mfn;
-}
-
-unsigned long
-shadow2_gfn_to_mfn_foreign(unsigned long gpfn)
-{
-    return mfn_x(sh2_gfn_to_mfn_foreign(current->domain, gpfn));
-}
-
-
-static void shadow2_p2m_teardown(struct domain *d)
-/* Return all the p2m pages to Xen.
- * We know we don't have any extra mappings to these pages */
-{
-    struct list_head *entry, *n;
-    struct page_info *pg;
-
-    d->arch.phys_table = pagetable_null();
-
-    list_for_each_safe(entry, n, &d->arch.shadow2.p2m_inuse)
-    {
-        pg = list_entry(entry, struct page_info, list);
-        list_del(entry);
-        /* Should have just the one ref we gave it in alloc_p2m_page() */
-        if ( (pg->count_info & PGC_SH2_count_mask) != 1 )
-        {
-            SHADOW2_PRINTK("Odd p2m page count c=%#x t=%"PRtype_info"\n",
-                           pg->count_info, pg->u.inuse.type_info);
-        }
-        ASSERT(page_get_owner(pg) == d);
-        /* Free should not decrement domain's total allocation, since 
-         * these pages were allocated without an owner. */
-        page_set_owner(pg, NULL); 
-        free_domheap_pages(pg, 0);
-        d->arch.shadow2.p2m_pages--;
-        perfc_decr(shadow2_alloc_count);
-    }
-    list_for_each_safe(entry, n, &d->arch.shadow2.p2m_freelist)
-    {
-        list_del(entry);
-        pg = list_entry(entry, struct page_info, list);
-        ASSERT(page_get_owner(pg) == d);
-        /* Free should not decrement domain's total allocation. */
-        page_set_owner(pg, NULL); 
-        free_domheap_pages(pg, 0);
-        d->arch.shadow2.p2m_pages--;
-        perfc_decr(shadow2_alloc_count);
-    }
-    ASSERT(d->arch.shadow2.p2m_pages == 0);
-}
-
-/* Set the pool of shadow pages to the required number of pages.
- * Input will be rounded up to at least shadow2_min_acceptable_pages(),
- * plus space for the p2m table.
- * Returns 0 for success, non-zero for failure. */
-static unsigned int set_sh2_allocation(struct domain *d, 
-                                       unsigned int pages,
-                                       int *preempted)
-{
-    struct page_info *pg;
-    unsigned int lower_bound;
-    int j;
-
-    ASSERT(shadow2_lock_is_acquired(d));
-    
-    /* Don't allocate less than the minimum acceptable, plus one page per
-     * megabyte of RAM (for the p2m table) */
-    lower_bound = shadow2_min_acceptable_pages(d) + (d->tot_pages / 256);
-    if ( pages > 0 && pages < lower_bound )
-        pages = lower_bound;
-    /* Round up to largest block size */
-    pages = (pages + ((1<<SHADOW2_MAX_ORDER)-1)) & ~((1<<SHADOW2_MAX_ORDER)-1);
-
-    SHADOW2_PRINTK("current %i target %i\n", 
-                   d->arch.shadow2.total_pages, pages);
-
-    while ( d->arch.shadow2.total_pages != pages ) 
-    {
-        if ( d->arch.shadow2.total_pages < pages ) 
-        {
-            /* Need to allocate more memory from domheap */
-            pg = alloc_domheap_pages(NULL, SHADOW2_MAX_ORDER, 0); 
-            if ( pg == NULL ) 
-            { 
-                SHADOW2_PRINTK("failed to allocate shadow pages.\n");
-                return -ENOMEM;
-            }
-            d->arch.shadow2.free_pages += 1<<SHADOW2_MAX_ORDER;
-            d->arch.shadow2.total_pages += 1<<SHADOW2_MAX_ORDER;
-            for ( j = 0; j < 1<<SHADOW2_MAX_ORDER; j++ ) 
-            {
-                pg[j].u.inuse.type_info = 0;  /* Free page */
-                pg[j].tlbflush_timestamp = 0; /* Not in any TLB */
-            }
-            SH2_SET_PFN_ORDER(pg, SHADOW2_MAX_ORDER);
-            list_add_tail(&pg->list, 
-                          &d->arch.shadow2.freelists[SHADOW2_MAX_ORDER]);
-        } 
-        else if ( d->arch.shadow2.total_pages > pages ) 
-        {
-            /* Need to return memory to domheap */
-            shadow2_prealloc(d, SHADOW2_MAX_ORDER);
-            ASSERT(!list_empty(&d->arch.shadow2.freelists[SHADOW2_MAX_ORDER]));
-            pg = list_entry(d->arch.shadow2.freelists[SHADOW2_MAX_ORDER].next, 
-                            struct page_info, list);
-            list_del(&pg->list);
-            d->arch.shadow2.free_pages -= 1<<SHADOW2_MAX_ORDER;
-            d->arch.shadow2.total_pages -= 1<<SHADOW2_MAX_ORDER;
-            free_domheap_pages(pg, SHADOW2_MAX_ORDER);
-        }
-
-        /* Check to see if we need to yield and try again */
-        if ( preempted && hypercall_preempt_check() )
-        {
-            *preempted = 1;
-            return 0;
-        }
-    }
-
-    return 0;
-}
-
-unsigned int shadow2_set_allocation(struct domain *d, 
-                                    unsigned int megabytes,
-                                    int *preempted)
-/* Hypercall interface to set the shadow memory allocation */
-{
-    unsigned int rv;
-    shadow2_lock(d);
-    rv = set_sh2_allocation(d, megabytes << (20 - PAGE_SHIFT), preempted); 
-    SHADOW2_PRINTK("dom %u allocation now %u pages (%u MB)\n",
-                   d->domain_id,
-                   d->arch.shadow2.total_pages,
-                   shadow2_get_allocation(d));
-    shadow2_unlock(d);
-    return rv;
-}
-
-/**************************************************************************/
-/* Hash table for storing the guest->shadow mappings */
-
-/* Hash function that takes a gfn or mfn, plus another byte of type info */
-typedef u32 key_t;
-static inline key_t sh2_hash(unsigned long n, u8 t) 
-{
-    unsigned char *p = (unsigned char *)&n;
-    key_t k = t;
-    int i;
-    for ( i = 0; i < sizeof(n) ; i++ ) k = (u32)p[i] + (k<<6) + (k<<16) - k;
-    return k;
-}
-
-#if SHADOW2_AUDIT & (SHADOW2_AUDIT_HASH|SHADOW2_AUDIT_HASH_FULL)
-
-/* Before we get to the mechanism, define a pair of audit functions
- * that sanity-check the contents of the hash table. */
-static void sh2_hash_audit_bucket(struct domain *d, int bucket)
-/* Audit one bucket of the hash table */
-{
-    struct shadow2_hash_entry *e, *x;
-    struct page_info *pg;
-
-    if ( !(SHADOW2_AUDIT_ENABLE) )
-        return;
-
-    e = &d->arch.shadow2.hash_table[bucket];
-    if ( e->t == 0 ) return; /* Bucket is empty */ 
-    while ( e )
-    {
-        /* Empty link? */
-        BUG_ON( e->t == 0 ); 
-        /* Bogus type? */
-        BUG_ON( e->t > (PGC_SH2_max_shadow >> PGC_SH2_type_shift) );
-        /* Wrong bucket? */
-        BUG_ON( sh2_hash(e->n, e->t) % SHADOW2_HASH_BUCKETS != bucket ); 
-        /* Duplicate entry? */
-        for ( x = e->next; x; x = x->next )
-            BUG_ON( x->n == e->n && x->t == e->t );
-        /* Bogus MFN? */
-        BUG_ON( !valid_mfn(e->smfn) );
-        pg = mfn_to_page(e->smfn);
-        /* Not a shadow? */
-        BUG_ON( page_get_owner(pg) != 0 );
-        /* Wrong kind of shadow? */
-        BUG_ON( (pg->count_info & PGC_SH2_type_mask) >> PGC_SH2_type_shift 
-                != e->t ); 
-        /* Bad backlink? */
-        BUG_ON( pg->u.inuse.type_info != e->n );
-        if ( e->t != (PGC_SH2_fl1_32_shadow >> PGC_SH2_type_shift)
-             && e->t != (PGC_SH2_fl1_pae_shadow >> PGC_SH2_type_shift)
-             && e->t != (PGC_SH2_fl1_64_shadow >> PGC_SH2_type_shift) )
-        {
-            /* Bad shadow flags on guest page? */
-            BUG_ON( !(mfn_to_page(_mfn(e->n))->shadow2_flags & (1<<e->t)) );
-        }
-        /* That entry was OK; on we go */
-        e = e->next;
-    }
-}
-
-#else
-#define sh2_hash_audit_bucket(_d, _b)
-#endif /* Hashtable bucket audit */
-
-
-#if SHADOW2_AUDIT & SHADOW2_AUDIT_HASH_FULL
-
-static void sh2_hash_audit(struct domain *d)
-/* Full audit: audit every bucket in the table */
-{
-    int i;
-
-    if ( !(SHADOW2_AUDIT_ENABLE) )
-        return;
-
-    for ( i = 0; i < SHADOW2_HASH_BUCKETS; i++ ) 
-    {
-        sh2_hash_audit_bucket(d, i);
-    }
-}
-
-#else
-#define sh2_hash_audit(_d)
-#endif /* Hashtable bucket audit */
-
-/* Memory management interface for bucket allocation.
- * These ought to come out of shadow memory, but at least on 32-bit
- * machines we are forced to allocate them from xenheap so that we can
- * address them. */
-static struct shadow2_hash_entry *sh2_alloc_hash_entry(struct domain *d)
-{
-    struct shadow2_hash_entry *extra, *x;
-    int i;
-
-    /* We need to allocate a new node. Ensure the free list is not empty. 
-     * Allocate new entries in units the same size as the original table. */
-    if ( unlikely(d->arch.shadow2.hash_freelist == NULL) )
-    {
-        size_t sz = sizeof(void *) + (SHADOW2_HASH_BUCKETS * sizeof(*x));
-        extra = xmalloc_bytes(sz);
-
-        if ( extra == NULL )
-        {
-            /* No memory left! */
-            SHADOW2_ERROR("xmalloc() failed when allocating hash buckets.\n");
-            domain_crash_synchronous();
-        }
-        memset(extra, 0, sz);
-
-        /* Record the allocation block so it can be correctly freed later. */
-        *((struct shadow2_hash_entry **)&extra[SHADOW2_HASH_BUCKETS]) = 
-            d->arch.shadow2.hash_allocations;
-        d->arch.shadow2.hash_allocations = &extra[0];
-
-        /* Thread a free chain through the newly-allocated nodes. */
-        for ( i = 0; i < (SHADOW2_HASH_BUCKETS - 1); i++ )
-            extra[i].next = &extra[i+1];
-        extra[i].next = NULL;
-
-        /* Add the new nodes to the free list. */
-        d->arch.shadow2.hash_freelist = &extra[0];
-    }
-
-    /* Allocate a new node from the free list. */
-    x = d->arch.shadow2.hash_freelist;
-    d->arch.shadow2.hash_freelist = x->next;
-    return x;
-}
-
-static void sh2_free_hash_entry(struct domain *d, struct shadow2_hash_entry *e)
-{
-    /* Mark the bucket as empty and return it to the free list */
-    e->t = 0; 
-    e->next = d->arch.shadow2.hash_freelist;
-    d->arch.shadow2.hash_freelist = e;
-}
-
-
-/* Allocate and initialise the table itself.  
- * Returns 0 for success, 1 for error. */
-static int shadow2_hash_alloc(struct domain *d)
-{
-    struct shadow2_hash_entry *table;
-
-    ASSERT(shadow2_lock_is_acquired(d));
-    ASSERT(!d->arch.shadow2.hash_table);
-
-    table = xmalloc_array(struct shadow2_hash_entry, SHADOW2_HASH_BUCKETS);
-    if ( !table ) return 1;
-    memset(table, 0, 
-           SHADOW2_HASH_BUCKETS * sizeof (struct shadow2_hash_entry));
-    d->arch.shadow2.hash_table = table;
-    return 0;
-}
-
-/* Tear down the hash table and return all memory to Xen.
- * This function does not care whether the table is populated. */
-static void shadow2_hash_teardown(struct domain *d)
-{
-    struct shadow2_hash_entry *a, *n;
-
-    ASSERT(shadow2_lock_is_acquired(d));
-    ASSERT(d->arch.shadow2.hash_table);
-
-    /* Return the table itself */
-    xfree(d->arch.shadow2.hash_table);
-    d->arch.shadow2.hash_table = NULL;
-
-    /* Return any extra allocations */
-    a = d->arch.shadow2.hash_allocations;
-    while ( a ) 
-    {
-        /* We stored a linked-list pointer at the end of each allocation */
-        n = *((struct shadow2_hash_entry **)(&a[SHADOW2_HASH_BUCKETS]));
-        xfree(a);
-        a = n;
-    }
-    d->arch.shadow2.hash_allocations = NULL;
-    d->arch.shadow2.hash_freelist = NULL;
-}
-
-
-mfn_t shadow2_hash_lookup(struct vcpu *v, unsigned long n, u8 t)
-/* Find an entry in the hash table.  Returns the MFN of the shadow,
- * or INVALID_MFN if it doesn't exist */
-{
-    struct domain *d = v->domain;
-    struct shadow2_hash_entry *p, *x, *head;
-    key_t key;
-
-    ASSERT(shadow2_lock_is_acquired(d));
-    ASSERT(d->arch.shadow2.hash_table);
-    ASSERT(t);
-
-    sh2_hash_audit(d);
-
-    perfc_incrc(shadow2_hash_lookups);
-    key = sh2_hash(n, t);
-
-    x = head = &d->arch.shadow2.hash_table[key % SHADOW2_HASH_BUCKETS];
-    p = NULL;
-
-    sh2_hash_audit_bucket(d, key % SHADOW2_HASH_BUCKETS);
-
-    do
-    {
-        ASSERT(x->t || ((x == head) && (x->next == NULL)));
-
-        if ( x->n == n && x->t == t )
-        {
-            /* Pull-to-front if 'x' isn't already the head item */
-            if ( unlikely(x != head) )
-            {
-                if ( unlikely(d->arch.shadow2.hash_walking != 0) )
-                    /* Can't reorder: someone is walking the hash chains */
-                    return x->smfn;
-                else 
-                {
-                    /* Delete 'x' from list and reinsert after head. */
-                    p->next = x->next;
-                    x->next = head->next;
-                    head->next = x;
-                    
-                    /* Swap 'x' contents with head contents. */
-                    SWAP(head->n, x->n);
-                    SWAP(head->t, x->t);
-                    SWAP(head->smfn, x->smfn);
-                }
-            }
-            else
-            {
-                perfc_incrc(shadow2_hash_lookup_head);
-            }
-            return head->smfn;
-        }
-
-        p = x;
-        x = x->next;
-    }
-    while ( x != NULL );
-
-    perfc_incrc(shadow2_hash_lookup_miss);
-    return _mfn(INVALID_MFN);
-}
-
-void shadow2_hash_insert(struct vcpu *v, unsigned long n, u8 t, mfn_t smfn)
-/* Put a mapping (n,t)->smfn into the hash table */
-{
-    struct domain *d = v->domain;
-    struct shadow2_hash_entry *x, *head;
-    key_t key;
-    
-    ASSERT(shadow2_lock_is_acquired(d));
-    ASSERT(d->arch.shadow2.hash_table);
-    ASSERT(t);
-
-    sh2_hash_audit(d);
-
-    perfc_incrc(shadow2_hash_inserts);
-    key = sh2_hash(n, t);
-
-    head = &d->arch.shadow2.hash_table[key % SHADOW2_HASH_BUCKETS];
-
-    sh2_hash_audit_bucket(d, key % SHADOW2_HASH_BUCKETS);
-
-    /* If the bucket is empty then insert the new page as the head item. */
-    if ( head->t == 0 )
-    {
-        head->n = n;
-        head->t = t;
-        head->smfn = smfn;
-        ASSERT(head->next == NULL);
-    }
-    else 
-    {
-        /* Insert a new entry directly after the head item. */
-        x = sh2_alloc_hash_entry(d);
-        x->n = n; 
-        x->t = t;
-        x->smfn = smfn;
-        x->next = head->next;
-        head->next = x;
-    }
-    
-    sh2_hash_audit_bucket(d, key % SHADOW2_HASH_BUCKETS);
-}
-
-void shadow2_hash_delete(struct vcpu *v, unsigned long n, u8 t, mfn_t smfn)
-/* Excise the mapping (n,t)->smfn from the hash table */
-{
-    struct domain *d = v->domain;
-    struct shadow2_hash_entry *p, *x, *head;
-    key_t key;
-
-    ASSERT(shadow2_lock_is_acquired(d));
-    ASSERT(d->arch.shadow2.hash_table);
-    ASSERT(t);
-
-    sh2_hash_audit(d);
-
-    perfc_incrc(shadow2_hash_deletes);
-    key = sh2_hash(n, t);
-
-    head = &d->arch.shadow2.hash_table[key % SHADOW2_HASH_BUCKETS];
-
-    sh2_hash_audit_bucket(d, key % SHADOW2_HASH_BUCKETS);
-
-    /* Match on head item? */
-    if ( head->n == n && head->t == t )
-    {
-        if ( (x = head->next) != NULL )
-        {
-            /* Overwrite head with contents of following node. */
-            head->n = x->n;
-            head->t = x->t;
-            head->smfn = x->smfn;
-
-            /* Delete following node. */
-            head->next = x->next;
-            sh2_free_hash_entry(d, x);
-        }
-        else
-        {
-            /* This bucket is now empty. Initialise the head node. */
-            head->t = 0;
-        }
-    }
-    else 
-    {
-        /* Not at the head; need to walk the chain */
-        p = head;
-        x = head->next; 
-        
-        while(1)
-        {
-            ASSERT(x); /* We can't have hit the end, since our target is
-                        * still in the chain somehwere... */
-            if ( x->n == n && x->t == t )
-            {
-                /* Delete matching node. */
-                p->next = x->next;
-                sh2_free_hash_entry(d, x);
-                break;
-            }
-            p = x;
-            x = x->next;
-        }
-    }
-
-    sh2_hash_audit_bucket(d, key % SHADOW2_HASH_BUCKETS);
-}
-
-typedef int (*hash_callback_t)(struct vcpu *v, mfn_t smfn, mfn_t other_mfn);
-
-static void hash_foreach(struct vcpu *v, 
-                         unsigned int callback_mask, 
-                         hash_callback_t callbacks[], 
-                         mfn_t callback_mfn)
-/* Walk the hash table looking at the types of the entries and 
- * calling the appropriate callback function for each entry. 
- * The mask determines which shadow types we call back for, and the array
- * of callbacks tells us which function to call.
- * Any callback may return non-zero to let us skip the rest of the scan. 
- *
- * WARNING: Callbacks MUST NOT add or remove hash entries unless they 
- * then return non-zero to terminate the scan. */
-{
-    int i, done = 0;
-    struct domain *d = v->domain;
-    struct shadow2_hash_entry *x;
-
-    /* Say we're here, to stop hash-lookups reordering the chains */
-    ASSERT(shadow2_lock_is_acquired(d));
-    ASSERT(d->arch.shadow2.hash_walking == 0);
-    d->arch.shadow2.hash_walking = 1;
-
-    callback_mask &= ~1; /* Never attempt to call back on empty buckets */
-    for ( i = 0; i < SHADOW2_HASH_BUCKETS; i++ ) 
-    {
-        /* WARNING: This is not safe against changes to the hash table.
-         * The callback *must* return non-zero if it has inserted or
-         * deleted anything from the hash (lookups are OK, though). */
-        for ( x = &d->arch.shadow2.hash_table[i]; x; x = x->next )
-        {
-            if ( callback_mask & (1 << x->t) ) 
-            {
-                ASSERT(x->t <= 15);
-                ASSERT(callbacks[x->t] != NULL);
-                if ( (done = callbacks[x->t](v, x->smfn, callback_mfn)) != 0 )
-                    break;
-            }
-        }
-        if ( done ) break; 
-    }
-    d->arch.shadow2.hash_walking = 0; 
-}
-
-
-/**************************************************************************/
-/* Destroy a shadow page: simple dispatcher to call the per-type destructor
- * which will decrement refcounts appropriately and return memory to the 
- * free pool. */
-
-void sh2_destroy_shadow(struct vcpu *v, mfn_t smfn)
-{
-    struct page_info *pg = mfn_to_page(smfn);
-    u32 t = pg->count_info & PGC_SH2_type_mask;
-
-
-    SHADOW2_PRINTK("smfn=%#lx\n", mfn_x(smfn));
-
-    /* Double-check, if we can, that the shadowed page belongs to this
-     * domain, (by following the back-pointer). */
-    ASSERT(t == PGC_SH2_fl1_32_shadow  ||  
-           t == PGC_SH2_fl1_pae_shadow ||  
-           t == PGC_SH2_fl1_64_shadow  || 
-           t == PGC_SH2_monitor_table  || 
-           (page_get_owner(mfn_to_page(_mfn(pg->u.inuse.type_info))) 
-            == v->domain)); 
-
-    /* The down-shifts here are so that the switch statement is on nice
-     * small numbers that the compiler will enjoy */
-    switch ( t >> PGC_SH2_type_shift )
-    {
-#if CONFIG_PAGING_LEVELS == 2
-    case PGC_SH2_l1_32_shadow >> PGC_SH2_type_shift:
-    case PGC_SH2_fl1_32_shadow >> PGC_SH2_type_shift:
-        SHADOW2_INTERNAL_NAME(sh2_destroy_l1_shadow, 2, 2)(v, smfn); 
-        break;
-    case PGC_SH2_l2_32_shadow >> PGC_SH2_type_shift:
-        SHADOW2_INTERNAL_NAME(sh2_destroy_l2_shadow, 2, 2)(v, smfn);
-        break;
-#else /* PAE or 64bit */
-    case PGC_SH2_l1_32_shadow >> PGC_SH2_type_shift:
-    case PGC_SH2_fl1_32_shadow >> PGC_SH2_type_shift:
-        SHADOW2_INTERNAL_NAME(sh2_destroy_l1_shadow, 3, 2)(v, smfn);
-        break;
-    case PGC_SH2_l2_32_shadow >> PGC_SH2_type_shift:
-        SHADOW2_INTERNAL_NAME(sh2_destroy_l2_shadow, 3, 2)(v, smfn);
-        break;
-#endif
-
-#if CONFIG_PAGING_LEVELS >= 3
-    case PGC_SH2_l1_pae_shadow >> PGC_SH2_type_shift:
-    case PGC_SH2_fl1_pae_shadow >> PGC_SH2_type_shift:
-        SHADOW2_INTERNAL_NAME(sh2_destroy_l1_shadow, 3, 3)(v, smfn);
-        break;
-    case PGC_SH2_l2_pae_shadow >> PGC_SH2_type_shift:
-    case PGC_SH2_l2h_pae_shadow >> PGC_SH2_type_shift:
-        SHADOW2_INTERNAL_NAME(sh2_destroy_l2_shadow, 3, 3)(v, smfn);
-        break;
-    case PGC_SH2_l3_pae_shadow >> PGC_SH2_type_shift:
-        SHADOW2_INTERNAL_NAME(sh2_destroy_l3_shadow, 3, 3)(v, smfn);
-        break;
-#endif
-
-#if CONFIG_PAGING_LEVELS >= 4
-    case PGC_SH2_l1_64_shadow >> PGC_SH2_type_shift:
-    case PGC_SH2_fl1_64_shadow >> PGC_SH2_type_shift:
-        SHADOW2_INTERNAL_NAME(sh2_destroy_l1_shadow, 4, 4)(v, smfn);
-        break;
-    case PGC_SH2_l2_64_shadow >> PGC_SH2_type_shift:
-        SHADOW2_INTERNAL_NAME(sh2_destroy_l2_shadow, 4, 4)(v, smfn);
-        break;
-    case PGC_SH2_l3_64_shadow >> PGC_SH2_type_shift:
-        SHADOW2_INTERNAL_NAME(sh2_destroy_l3_shadow, 4, 4)(v, smfn);
-        break;
-    case PGC_SH2_l4_64_shadow >> PGC_SH2_type_shift:
-        SHADOW2_INTERNAL_NAME(sh2_destroy_l4_shadow, 4, 4)(v, smfn);
-        break;
-#endif
-    default:
-        SHADOW2_PRINTK("tried to destroy shadow of bad type %08lx\n", 
-                       (unsigned long)t);
-        BUG();
-    }    
-}
-
-/**************************************************************************/
-/* Remove all writeable mappings of a guest frame from the shadow tables 
- * Returns non-zero if we need to flush TLBs. 
- * level and fault_addr desribe how we found this to be a pagetable;
- * level==0 means we have some other reason for revoking write access.*/
-
-int shadow2_remove_write_access(struct vcpu *v, mfn_t gmfn, 
-                                unsigned int level,
-                                unsigned long fault_addr)
-{
-    /* Dispatch table for getting per-type functions */
-    static hash_callback_t callbacks[16] = {
-        NULL, /* none    */
-#if CONFIG_PAGING_LEVELS == 2
-        SHADOW2_INTERNAL_NAME(sh2_remove_write_access,2,2), /* l1_32   */
-        SHADOW2_INTERNAL_NAME(sh2_remove_write_access,2,2), /* fl1_32  */
-#else 
-        SHADOW2_INTERNAL_NAME(sh2_remove_write_access,3,2), /* l1_32   */
-        SHADOW2_INTERNAL_NAME(sh2_remove_write_access,3,2), /* fl1_32  */
-#endif
-        NULL, /* l2_32   */
-#if CONFIG_PAGING_LEVELS >= 3
-        SHADOW2_INTERNAL_NAME(sh2_remove_write_access,3,3), /* l1_pae  */
-        SHADOW2_INTERNAL_NAME(sh2_remove_write_access,3,3), /* fl1_pae */
-#else 
-        NULL, /* l1_pae  */
-        NULL, /* fl1_pae */
-#endif
-        NULL, /* l2_pae  */
-        NULL, /* l2h_pae */
-        NULL, /* l3_pae  */
-#if CONFIG_PAGING_LEVELS >= 4
-        SHADOW2_INTERNAL_NAME(sh2_remove_write_access,4,4), /* l1_64   */
-        SHADOW2_INTERNAL_NAME(sh2_remove_write_access,4,4), /* fl1_64  */
-#else
-        NULL, /* l1_64   */
-        NULL, /* fl1_64  */
-#endif
-        NULL, /* l2_64   */
-        NULL, /* l3_64   */
-        NULL, /* l4_64   */
-        NULL, /* p2m     */
-        NULL  /* unused  */
-    };
-
-    static unsigned int callback_mask = 
-          1 << (PGC_SH2_l1_32_shadow >> PGC_SH2_type_shift)
-        | 1 << (PGC_SH2_fl1_32_shadow >> PGC_SH2_type_shift)
-        | 1 << (PGC_SH2_l1_pae_shadow >> PGC_SH2_type_shift)
-        | 1 << (PGC_SH2_fl1_pae_shadow >> PGC_SH2_type_shift)
-        | 1 << (PGC_SH2_l1_64_shadow >> PGC_SH2_type_shift)
-        | 1 << (PGC_SH2_fl1_64_shadow >> PGC_SH2_type_shift)
-        ;
-    struct page_info *pg = mfn_to_page(gmfn);
-
-    ASSERT(shadow2_lock_is_acquired(v->domain));
-
-    /* Only remove writable mappings if we are doing shadow refcounts.
-     * In guest refcounting, we trust Xen to already be restricting
-     * all the writes to the guest page tables, so we do not need to
-     * do more. */
-    if ( !shadow2_mode_refcounts(v->domain) )
-        return 0;
-
-    /* Early exit if it's already a pagetable, or otherwise not writeable */
-    if ( sh2_mfn_is_a_page_table(gmfn) 
-         || (pg->u.inuse.type_info & PGT_count_mask) == 0 )
-        return 0;
-
-    perfc_incrc(shadow2_writeable);
-
-    /* If this isn't a "normal" writeable page, the domain is trying to 
-     * put pagetables in special memory of some kind.  We can't allow that. */
-    if ( (pg->u.inuse.type_info & PGT_type_mask) != PGT_writable_page )
-    {
-        SHADOW2_ERROR("can't remove write access to mfn %lx, type_info is %" 
-                      PRtype_info "\n",
-                      mfn_x(gmfn), mfn_to_page(gmfn)->u.inuse.type_info);
-        domain_crash(v->domain);
-    }
-
-#if SHADOW2_OPTIMIZATIONS & SH2OPT_WRITABLE_HEURISTIC
-    if ( v == current && level != 0 )
-    {
-        unsigned long gfn;
-        /* Heuristic: there is likely to be only one writeable mapping,
-         * and that mapping is likely to be in the current pagetable,
-         * either in the guest's linear map (linux, windows) or in a
-         * magic slot used to map high memory regions (linux HIGHTPTE) */
-
-#define GUESS(_a, _h) do {                                              \
-            if ( v->arch.shadow2.mode->guess_wrmap(v, (_a), gmfn) )          \
-                perfc_incrc(shadow2_writeable_h_ ## _h);                \
-            if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )        \
-                return 1;                                               \
-        } while (0)
-
-        
-        /* Linux lowmem: first 1GB is mapped 1-to-1 above 0xC0000000 */
-        if ( v == current 
-             && (gfn = sh2_mfn_to_gfn(v->domain, gmfn)) < 0x40000000 )
-            GUESS(0xC0000000 + (gfn << PAGE_SHIFT), 4);
-
-        if ( v->arch.shadow2.mode->guest_levels == 2 )
-        {
-            if ( level == 1 )
-                /* 32bit non-PAE w2k3: linear map at 0xC0000000 */
-                GUESS(0xC0000000UL + (fault_addr >> 10), 1);
-        }
-#if CONFIG_PAGING_LEVELS >= 3
-        else if ( v->arch.shadow2.mode->guest_levels == 3 )
-        {
-            /* 32bit PAE w2k3: linear map at 0xC0000000 */
-            switch ( level ) 
-            {
-            case 1: GUESS(0xC0000000UL + (fault_addr >> 9), 2); break;
-            case 2: GUESS(0xC0600000UL + (fault_addr >> 18), 2); break;
-            }
-        }
-#if CONFIG_PAGING_LEVELS >= 4
-        else if ( v->arch.shadow2.mode->guest_levels == 4 )
-        {
-            /* 64bit w2k3: linear map at 0x0000070000000000 */
-            switch ( level ) 
-            {
-            case 1: GUESS(0x70000000000UL + (fault_addr >> 9), 3); break;
-            case 2: GUESS(0x70380000000UL + (fault_addr >> 18), 3); break;
-            case 3: GUESS(0x70381C00000UL + (fault_addr >> 27), 3); break;
-            }
-        }
-#endif /* CONFIG_PAGING_LEVELS >= 4 */
-#endif /* CONFIG_PAGING_LEVELS >= 3 */
-
-#undef GUESS
-
-    }
-#endif
-    
-    /* Brute-force search of all the shadows, by walking the hash */
-    perfc_incrc(shadow2_writeable_bf);
-    hash_foreach(v, callback_mask, callbacks, gmfn);
-
-    /* If that didn't catch the mapping, something is very wrong */
-    if ( (mfn_to_page(gmfn)->u.inuse.type_info & PGT_count_mask) != 0 )
-    {
-        SHADOW2_ERROR("can't find all writeable mappings of mfn %lx: "
-                      "%lu left\n", mfn_x(gmfn),
-                      (mfn_to_page(gmfn)->u.inuse.type_info&PGT_count_mask));
-        domain_crash(v->domain);
-    }
-    
-    /* We killed at least one writeable mapping, so must flush TLBs. */
-    return 1;
-}
-
-
-
-/**************************************************************************/
-/* Remove all mappings of a guest frame from the shadow tables.
- * Returns non-zero if we need to flush TLBs. */
-
-int shadow2_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
-{
-    struct page_info *page = mfn_to_page(gmfn);
-    int expected_count;
-
-    /* Dispatch table for getting per-type functions */
-    static hash_callback_t callbacks[16] = {
-        NULL, /* none    */
-#if CONFIG_PAGING_LEVELS == 2
-        SHADOW2_INTERNAL_NAME(sh2_remove_all_mappings,2,2), /* l1_32   */
-        SHADOW2_INTERNAL_NAME(sh2_remove_all_mappings,2,2), /* fl1_32  */
-#else 
-        SHADOW2_INTERNAL_NAME(sh2_remove_all_mappings,3,2), /* l1_32   */
-        SHADOW2_INTERNAL_NAME(sh2_remove_all_mappings,3,2), /* fl1_32  */
-#endif
-        NULL, /* l2_32   */
-#if CONFIG_PAGING_LEVELS >= 3
-        SHADOW2_INTERNAL_NAME(sh2_remove_all_mappings,3,3), /* l1_pae  */
-        SHADOW2_INTERNAL_NAME(sh2_remove_all_mappings,3,3), /* fl1_pae */
-#else 
-        NULL, /* l1_pae  */
-        NULL, /* fl1_pae */
-#endif
-        NULL, /* l2_pae  */
-        NULL, /* l2h_pae */
-        NULL, /* l3_pae  */
-#if CONFIG_PAGING_LEVELS >= 4
-        SHADOW2_INTERNAL_NAME(sh2_remove_all_mappings,4,4), /* l1_64   */
-        SHADOW2_INTERNAL_NAME(sh2_remove_all_mappings,4,4), /* fl1_64  */
-#else
-        NULL, /* l1_64   */
-        NULL, /* fl1_64  */
-#endif
-        NULL, /* l2_64   */
-        NULL, /* l3_64   */
-        NULL, /* l4_64   */
-        NULL, /* p2m     */
-        NULL  /* unused  */
-    };
-
-    static unsigned int callback_mask = 
-          1 << (PGC_SH2_l1_32_shadow >> PGC_SH2_type_shift)
-        | 1 << (PGC_SH2_fl1_32_shadow >> PGC_SH2_type_shift)
-        | 1 << (PGC_SH2_l1_pae_shadow >> PGC_SH2_type_shift)
-        | 1 << (PGC_SH2_fl1_pae_shadow >> PGC_SH2_type_shift)
-        | 1 << (PGC_SH2_l1_64_shadow >> PGC_SH2_type_shift)
-        | 1 << (PGC_SH2_fl1_64_shadow >> PGC_SH2_type_shift)
-        ;
-
-    perfc_incrc(shadow2_mappings);
-    if ( (page->count_info & PGC_count_mask) == 0 )
-        return 0;
-
-    ASSERT(shadow2_lock_is_acquired(v->domain));
-
-    /* XXX TODO: 
-     * Heuristics for finding the (probably) single mapping of this gmfn */
-    
-    /* Brute-force search of all the shadows, by walking the hash */
-    perfc_incrc(shadow2_mappings_bf);
-    hash_foreach(v, callback_mask, callbacks, gmfn);
-
-    /* If that didn't catch the mapping, something is very wrong */
-    expected_count = (page->count_info & PGC_allocated) ? 1 : 0;
-    if ( (page->count_info & PGC_count_mask) != expected_count )
-    {
-        /* Don't complain if we're in HVM and there's one extra mapping: 
-         * The qemu helper process has an untyped mapping of this dom's RAM */
-        if ( !(shadow2_mode_external(v->domain)
-               && (page->count_info & PGC_count_mask) <= 2
-               && (page->u.inuse.type_info & PGT_count_mask) == 0) )
-        {
-            SHADOW2_ERROR("can't find all mappings of mfn %lx: "
-                          "c=%08x t=%08lx\n", mfn_x(gmfn), 
-                          page->count_info, page->u.inuse.type_info);
-        }
-    }
-
-    /* We killed at least one mapping, so must flush TLBs. */
-    return 1;
-}
-
-
-/**************************************************************************/
-/* Remove all shadows of a guest frame from the shadow tables */
-
-static int sh2_remove_shadow_via_pointer(struct vcpu *v, mfn_t smfn)
-/* Follow this shadow's up-pointer, if it has one, and remove the reference
- * found there.  Returns 1 if that was the only reference to this shadow */
-{
-    struct page_info *pg = mfn_to_page(smfn);
-    mfn_t pmfn;
-    void *vaddr;
-    int rc;
-
-    ASSERT((pg->count_info & PGC_SH2_type_mask) > 0);
-    ASSERT((pg->count_info & PGC_SH2_type_mask) < PGC_SH2_max_shadow);
-    ASSERT((pg->count_info & PGC_SH2_type_mask) != PGC_SH2_l2_32_shadow);
-    ASSERT((pg->count_info & PGC_SH2_type_mask) != PGC_SH2_l3_pae_shadow);
-    ASSERT((pg->count_info & PGC_SH2_type_mask) != PGC_SH2_l4_64_shadow);
-    
-    if (pg->up == 0) return 0;
-    pmfn = _mfn(pg->up >> PAGE_SHIFT);
-    ASSERT(valid_mfn(pmfn));
-    vaddr = sh2_map_domain_page(pmfn);
-    ASSERT(vaddr);
-    vaddr += pg->up & (PAGE_SIZE-1);
-    ASSERT(l1e_get_pfn(*(l1_pgentry_t *)vaddr) == mfn_x(smfn));
-    
-    /* Is this the only reference to this shadow? */
-    rc = ((pg->count_info & PGC_SH2_count_mask) == 1) ? 1 : 0;
-
-    /* Blank the offending entry */
-    switch ((pg->count_info & PGC_SH2_type_mask)) 
-    {
-    case PGC_SH2_l1_32_shadow:
-    case PGC_SH2_l2_32_shadow:
-#if CONFIG_PAGING_LEVELS == 2
-        SHADOW2_INTERNAL_NAME(sh2_clear_shadow_entry,2,2)(v, vaddr, pmfn);
-#else
-        SHADOW2_INTERNAL_NAME(sh2_clear_shadow_entry,3,2)(v, vaddr, pmfn);
-#endif
-        break;
-#if CONFIG_PAGING_LEVELS >=3
-    case PGC_SH2_l1_pae_shadow:
-    case PGC_SH2_l2_pae_shadow:
-    case PGC_SH2_l2h_pae_shadow:
-    case PGC_SH2_l3_pae_shadow:
-        SHADOW2_INTERNAL_NAME(sh2_clear_shadow_entry,3,3)(v, vaddr, pmfn);
-        break;
-#if CONFIG_PAGING_LEVELS >= 4
-    case PGC_SH2_l1_64_shadow:
-    case PGC_SH2_l2_64_shadow:
-    case PGC_SH2_l3_64_shadow:
-    case PGC_SH2_l4_64_shadow:
-        SHADOW2_INTERNAL_NAME(sh2_clear_shadow_entry,4,4)(v, vaddr, pmfn);
-        break;
-#endif
-#endif
-    default: BUG(); /* Some wierd unknown shadow type */
-    }
-    
-    sh2_unmap_domain_page(vaddr);
-    if ( rc )
-        perfc_incrc(shadow2_up_pointer);
-    else
-        perfc_incrc(shadow2_unshadow_bf);
-
-    return rc;
-}
-
-void sh2_remove_shadows(struct vcpu *v, mfn_t gmfn, int all)
-/* Remove the shadows of this guest page.  
- * If all != 0, find all shadows, if necessary by walking the tables.
- * Otherwise, just try the (much faster) heuristics, which will remove 
- * at most one reference to each shadow of the page. */
-{
-    struct page_info *pg;
-    mfn_t smfn;
-    u32 sh_flags;
-    unsigned char t;
-
-    /* Dispatch table for getting per-type functions: each level must
-     * be called with the function to remove a lower-level shadow. */
-    static hash_callback_t callbacks[16] = {
-        NULL, /* none    */
-        NULL, /* l1_32   */
-        NULL, /* fl1_32  */
-#if CONFIG_PAGING_LEVELS == 2
-        SHADOW2_INTERNAL_NAME(sh2_remove_l1_shadow,2,2), /* l2_32   */
-#else 
-        SHADOW2_INTERNAL_NAME(sh2_remove_l1_shadow,3,2), /* l2_32   */
-#endif
-        NULL, /* l1_pae  */
-        NULL, /* fl1_pae */
-#if CONFIG_PAGING_LEVELS >= 3
-        SHADOW2_INTERNAL_NAME(sh2_remove_l1_shadow,3,3), /* l2_pae  */
-        SHADOW2_INTERNAL_NAME(sh2_remove_l1_shadow,3,3), /* l2h_pae */
-        SHADOW2_INTERNAL_NAME(sh2_remove_l2_shadow,3,3), /* l3_pae  */
-#else 
-        NULL, /* l2_pae  */
-        NULL, /* l2h_pae */
-        NULL, /* l3_pae  */
-#endif
-        NULL, /* l1_64   */
-        NULL, /* fl1_64  */
-#if CONFIG_PAGING_LEVELS >= 4
-        SHADOW2_INTERNAL_NAME(sh2_remove_l1_shadow,4,4), /* l2_64   */
-        SHADOW2_INTERNAL_NAME(sh2_remove_l2_shadow,4,4), /* l3_64   */
-        SHADOW2_INTERNAL_NAME(sh2_remove_l3_shadow,4,4), /* l4_64   */
-#else
-        NULL, /* l2_64   */
-        NULL, /* l3_64   */
-        NULL, /* l4_64   */
-#endif
-        NULL, /* p2m     */
-        NULL  /* unused  */
-    };
-
-    /* Another lookup table, for choosing which mask to use */
-    static unsigned int masks[16] = {
-        0, /* none    */
-        1 << (PGC_SH2_l2_32_shadow >> PGC_SH2_type_shift), /* l1_32   */
-        0, /* fl1_32  */
-        0, /* l2_32   */
-        ((1 << (PGC_SH2_l2h_pae_shadow >> PGC_SH2_type_shift))
-         | (1 << (PGC_SH2_l2_pae_shadow >> PGC_SH2_type_shift))), /* l1_pae  */
-        0, /* fl1_pae */
-        1 << (PGC_SH2_l3_pae_shadow >> PGC_SH2_type_shift), /* l2_pae  */
-        1 << (PGC_SH2_l3_pae_shadow >> PGC_SH2_type_shift), /* l2h_pae  */
-        0, /* l3_pae  */
-        1 << (PGC_SH2_l2_64_shadow >> PGC_SH2_type_shift), /* l1_64   */
-        0, /* fl1_64  */
-        1 << (PGC_SH2_l3_64_shadow >> PGC_SH2_type_shift), /* l2_64   */
-        1 << (PGC_SH2_l4_64_shadow >> PGC_SH2_type_shift), /* l3_64   */
-        0, /* l4_64   */
-        0, /* p2m     */
-        0  /* unused  */
-    };
-
-    ASSERT(shadow2_lock_is_acquired(v->domain));
-
-    pg = mfn_to_page(gmfn);
-
-    /* Bale out now if the page is not shadowed */
-    if ( (pg->count_info & PGC_page_table) == 0 )
-        return;
-
-    SHADOW2_PRINTK("d=%d, v=%d, gmfn=%05lx\n",
-                   v->domain->domain_id, v->vcpu_id, mfn_x(gmfn));
-
-    /* Search for this shadow in all appropriate shadows */
-    perfc_incrc(shadow2_unshadow);
-    sh_flags = pg->shadow2_flags;
-
-    /* Lower-level shadows need to be excised from upper-level shadows.
-     * This call to hash_foreach() looks dangerous but is in fact OK: each
-     * call will remove at most one shadow, and terminate immediately when
-     * it does remove it, so we never walk the hash after doing a deletion.  */
-#define DO_UNSHADOW(_type) do {                                 \
-    t = (_type) >> PGC_SH2_type_shift;                          \
-    smfn = shadow2_hash_lookup(v, mfn_x(gmfn), t);              \
-    if ( !sh2_remove_shadow_via_pointer(v, smfn) && all )       \
-        hash_foreach(v, masks[t], callbacks, smfn);             \
-} while (0)
-
-    /* Top-level shadows need to be unpinned */
-#define DO_UNPIN(_type) do {                                             \
-    t = (_type) >> PGC_SH2_type_shift;                                   \
-    smfn = shadow2_hash_lookup(v, mfn_x(gmfn), t);                       \
-    if ( mfn_to_page(smfn)->count_info & PGC_SH2_pinned )                \
-        sh2_unpin(v, smfn);                                              \
-    if ( (_type) == PGC_SH2_l3_pae_shadow )                              \
-        SHADOW2_INTERNAL_NAME(sh2_unpin_all_l3_subshadows,3,3)(v, smfn); \
-} while (0)
-
-    if ( sh_flags & SH2F_L1_32 )   DO_UNSHADOW(PGC_SH2_l1_32_shadow);
-    if ( sh_flags & SH2F_L2_32 )   DO_UNPIN(PGC_SH2_l2_32_shadow);
-#if CONFIG_PAGING_LEVELS >= 3
-    if ( sh_flags & SH2F_L1_PAE )  DO_UNSHADOW(PGC_SH2_l1_pae_shadow);
-    if ( sh_flags & SH2F_L2_PAE )  DO_UNSHADOW(PGC_SH2_l2_pae_shadow);
-    if ( sh_flags & SH2F_L2H_PAE ) DO_UNSHADOW(PGC_SH2_l2h_pae_shadow);
-    if ( sh_flags & SH2F_L3_PAE )  DO_UNPIN(PGC_SH2_l3_pae_shadow);
-#if CONFIG_PAGING_LEVELS >= 4
-    if ( sh_flags & SH2F_L1_64 )   DO_UNSHADOW(PGC_SH2_l1_64_shadow);
-    if ( sh_flags & SH2F_L2_64 )   DO_UNSHADOW(PGC_SH2_l2_64_shadow);
-    if ( sh_flags & SH2F_L3_64 )   DO_UNSHADOW(PGC_SH2_l3_64_shadow);
-    if ( sh_flags & SH2F_L4_64 )   DO_UNPIN(PGC_SH2_l4_64_shadow);
-#endif
-#endif
-
-#undef DO_UNSHADOW
-#undef DO_UNPIN
-
-
-#if CONFIG_PAGING_LEVELS > 2
-    /* We may have caused some PAE l3 entries to change: need to 
-     * fix up the copies of them in various places */
-    if ( sh_flags & (SH2F_L2_PAE|SH2F_L2H_PAE) )
-        sh2_pae_recopy(v->domain);
-#endif
-
-    /* If that didn't catch the shadows, something is wrong */
-    if ( all && (pg->count_info & PGC_page_table) )
-    {
-        SHADOW2_ERROR("can't find all shadows of mfn %05lx (shadow2_flags=%08x)\n",
-                      mfn_x(gmfn), pg->shadow2_flags);
-        domain_crash(v->domain);
-    }
-}
-
-void
-shadow2_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn)
-/* Even harsher: this is a HVM page that we thing is no longer a pagetable.
- * Unshadow it, and recursively unshadow pages that reference it. */
-{
-    shadow2_remove_all_shadows(v, gmfn);
-    /* XXX TODO:
-     * Rework this hashtable walker to return a linked-list of all 
-     * the shadows it modified, then do breadth-first recursion 
-     * to find the way up to higher-level tables and unshadow them too. 
-     *
-     * The current code (just tearing down each page's shadows as we
-     * detect that it is not a pagetable) is correct, but very slow. 
-     * It means extra emulated writes and slows down removal of mappings. */
-}
-
-/**************************************************************************/
-
-void sh2_update_paging_modes(struct vcpu *v)
-{
-    struct domain *d = v->domain;
-    struct shadow2_paging_mode *old_mode = v->arch.shadow2.mode;
-    mfn_t old_guest_table;
-
-    ASSERT(shadow2_lock_is_acquired(d));
-
-    // Valid transitions handled by this function:
-    // - For PV guests:
-    //     - after a shadow mode has been changed
-    // - For HVM guests:
-    //     - after a shadow mode has been changed
-    //     - changes in CR0.PG, CR4.PAE, CR4.PSE, or CR4.PGE
-    //
-
-    // Avoid determining the current shadow2 mode for uninitialized CPUs, as
-    // we can not yet determine whether it is an HVM or PV domain.
-    //
-    if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
-    {
-        printk("%s: postponing determination of shadow2 mode\n", __func__);
-        return;
-    }
-
-    // First, tear down any old shadow tables held by this vcpu.
-    //
-    shadow2_detach_old_tables(v);
-
-    if ( !hvm_guest(v) )
-    {
-        ///
-        /// PV guest
-        ///
-#if CONFIG_PAGING_LEVELS == 4
-        if ( pv_32bit_guest(v) )
-            v->arch.shadow2.mode = &SHADOW2_INTERNAL_NAME(sh2_paging_mode,4,3);
-        else
-            v->arch.shadow2.mode = &SHADOW2_INTERNAL_NAME(sh2_paging_mode,4,4);
-#elif CONFIG_PAGING_LEVELS == 3
-        v->arch.shadow2.mode = &SHADOW2_INTERNAL_NAME(sh2_paging_mode,3,3);
-#elif CONFIG_PAGING_LEVELS == 2
-        v->arch.shadow2.mode = &SHADOW2_INTERNAL_NAME(sh2_paging_mode,2,2);
-#else
-#error unexpected paging mode
-#endif
-    }
-    else
-    {
-        ///
-        /// HVM guest
-        ///
-        ASSERT(shadow2_mode_translate(d));
-        ASSERT(shadow2_mode_external(d));
-
-        v->arch.shadow2.hvm_paging_enabled = !!hvm_paging_enabled(v);
-        if ( !v->arch.shadow2.hvm_paging_enabled )
-        {
-            
-            /* Set v->arch.guest_table to use the p2m map, and choose
-             * the appropriate shadow mode */
-            old_guest_table = pagetable_get_mfn(v->arch.guest_table);
-#if CONFIG_PAGING_LEVELS == 2
-            v->arch.guest_table =
-                pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table));
-            v->arch.shadow2.mode = &SHADOW2_INTERNAL_NAME(sh2_paging_mode,2,2);
-#elif CONFIG_PAGING_LEVELS == 3 
-            v->arch.guest_table =
-                pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table));
-            v->arch.shadow2.mode = &SHADOW2_INTERNAL_NAME(sh2_paging_mode,3,3);
-#else /* CONFIG_PAGING_LEVELS == 4 */
-            { 
-                l4_pgentry_t *l4e; 
-                /* Use the start of the first l3 table as a PAE l3 */
-                ASSERT(pagetable_get_pfn(d->arch.phys_table) != 0);
-                l4e = sh2_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
-                ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT);
-                v->arch.guest_table =
-                    pagetable_from_pfn(l4e_get_pfn(l4e[0]));
-                sh2_unmap_domain_page(l4e);
-            }
-            v->arch.shadow2.mode = &SHADOW2_INTERNAL_NAME(sh2_paging_mode,3,3);
-#endif
-            /* Fix up refcounts on guest_table */
-            get_page(mfn_to_page(pagetable_get_mfn(v->arch.guest_table)), d);
-            if ( mfn_x(old_guest_table) != 0 )
-                put_page(mfn_to_page(old_guest_table));
-        }
-        else
-        {
-#ifdef __x86_64__
-            if ( hvm_long_mode_enabled(v) )
-            {
-                // long mode guest...
-                v->arch.shadow2.mode =
-                    &SHADOW2_INTERNAL_NAME(sh2_paging_mode, 4, 4);
-            }
-            else
-#endif
-                if ( hvm_get_guest_ctrl_reg(v, 4) & X86_CR4_PAE )
-                {
-#if CONFIG_PAGING_LEVELS >= 3
-                    // 32-bit PAE mode guest...
-                    v->arch.shadow2.mode =
-                        &SHADOW2_INTERNAL_NAME(sh2_paging_mode, 3, 3);
-#else
-                    SHADOW2_ERROR("PAE not supported in 32-bit Xen\n");
-                    domain_crash(d);
-                    return;
-#endif
-                }
-                else
-                {
-                    // 32-bit 2 level guest...
-#if CONFIG_PAGING_LEVELS >= 3
-                    v->arch.shadow2.mode =
-                        &SHADOW2_INTERNAL_NAME(sh2_paging_mode, 3, 2);
-#else
-                    v->arch.shadow2.mode =
-                        &SHADOW2_INTERNAL_NAME(sh2_paging_mode, 2, 2);
-#endif
-                }
-        }
-
-        if ( pagetable_get_pfn(v->arch.monitor_table) == 0 )
-        {
-            mfn_t mmfn = shadow2_make_monitor_table(v);
-            v->arch.monitor_table = pagetable_from_mfn(mmfn);
-            v->arch.monitor_vtable = sh2_map_domain_page(mmfn);
-        } 
-
-        if ( v->arch.shadow2.mode != old_mode )
-        {
-            SHADOW2_PRINTK("new paging mode: d=%u v=%u g=%u s=%u "
-                           "(was g=%u s=%u)\n",
-                           d->domain_id, v->vcpu_id, 
-                           v->arch.shadow2.mode->guest_levels,
-                           v->arch.shadow2.mode->shadow_levels,
-                           old_mode ? old_mode->guest_levels : 0,
-                           old_mode ? old_mode->shadow_levels : 0);
-            if ( old_mode &&
-                 (v->arch.shadow2.mode->shadow_levels !=
-                  old_mode->shadow_levels) )
-            {
-                /* Need to make a new monitor table for the new mode */
-                mfn_t new_mfn, old_mfn;
-
-                if ( v != current ) 
-                {
-                    SHADOW2_ERROR("Some third party (d=%u v=%u) is changing "
-                                  "this HVM vcpu's (d=%u v=%u) paging mode!\n",
-                                  current->domain->domain_id, current->vcpu_id,
-                                  v->domain->domain_id, v->vcpu_id);
-                    domain_crash(v->domain);
-                    return;
-                }
-
-                sh2_unmap_domain_page(v->arch.monitor_vtable);
-                old_mfn = pagetable_get_mfn(v->arch.monitor_table);
-                v->arch.monitor_table = pagetable_null();
-                new_mfn = v->arch.shadow2.mode->make_monitor_table(v);            
-                v->arch.monitor_table = pagetable_from_mfn(new_mfn);
-                v->arch.monitor_vtable = sh2_map_domain_page(new_mfn);
-                SHADOW2_PRINTK("new monitor table %"SH2_PRI_mfn "\n",
-                               mfn_x(new_mfn));
-
-                /* Don't be running on the old monitor table when we 
-                 * pull it down!  Switch CR3, and warn the HVM code that
-                 * its host cr3 has changed. */
-                make_cr3(v, mfn_x(new_mfn));
-                write_ptbase(v);
-                hvm_update_host_cr3(v);
-                old_mode->destroy_monitor_table(v, old_mfn);
-            }
-        }
-
-        // XXX -- Need to deal with changes in CR4.PSE and CR4.PGE.
-        //        These are HARD: think about the case where two CPU's have
-        //        different values for CR4.PSE and CR4.PGE at the same time.
-        //        This *does* happen, at least for CR4.PGE...
-    }
-
-    v->arch.shadow2.mode->update_cr3(v);
-}
-
-/**************************************************************************/
-/* Turning on and off shadow2 features */
-
-static void sh2_new_mode(struct domain *d, u32 new_mode)
-/* Inform all the vcpus that the shadow mode has been changed */
-{
-    struct vcpu *v;
-
-    ASSERT(shadow2_lock_is_acquired(d));
-    ASSERT(d != current->domain);
-    d->arch.shadow2.mode = new_mode;
-    if ( new_mode & SHM2_translate ) 
-        shadow2_audit_p2m(d);
-    for_each_vcpu(d, v)
-        sh2_update_paging_modes(v);
-}
-
-static int shadow2_enable(struct domain *d, u32 mode)
-/* Turn on "permanent" shadow features: external, translate, refcount.
- * Can only be called once on a domain, and these features cannot be
- * disabled. 
- * Returns 0 for success, -errno for failure. */
-{    
-    unsigned int old_pages;
-    int rv = 0;
-
-    mode |= SHM2_enable;
-
-    domain_pause(d);
-    shadow2_lock(d);
-
-    /* Sanity check the arguments */
-    if ( (d == current->domain) ||
-         shadow2_mode_enabled(d) ||
-         ((mode & SHM2_external) && !(mode & SHM2_translate)) )
-    {
-        rv = -EINVAL;
-        goto out;
-    }
-
-    // XXX -- eventually would like to require that all memory be allocated
-    // *after* shadow2_enabled() is called...  So here, we would test to make
-    // sure that d->page_list is empty.
-#if 0
-    spin_lock(&d->page_alloc_lock);
-    if ( !list_empty(&d->page_list) )
-    {
-        spin_unlock(&d->page_alloc_lock);
-        rv = -EINVAL;
-        goto out;
-    }
-    spin_unlock(&d->page_alloc_lock);
-#endif
-
-    /* Init the shadow memory allocation if the user hasn't done so */
-    old_pages = d->arch.shadow2.total_pages;
-    if ( old_pages == 0 )
-        if ( set_sh2_allocation(d, 256, NULL) != 0 ) /* Use at least 1MB */
-        {
-            set_sh2_allocation(d, 0, NULL);
-            rv = -ENOMEM;
-            goto out;
-        }
-
-    /* Init the hash table */
-    if ( shadow2_hash_alloc(d) != 0 )
-    {
-        set_sh2_allocation(d, old_pages, NULL);            
-        rv = -ENOMEM;
-        goto out;
-    }
-
-    /* Init the P2M table */
-    if ( mode & SHM2_translate )
-        if ( !shadow2_alloc_p2m_table(d) )
-        {
-            shadow2_hash_teardown(d);
-            set_sh2_allocation(d, old_pages, NULL);
-            shadow2_p2m_teardown(d);
-            rv = -ENOMEM;
-            goto out;
-        }
-
-    /* Update the bits */
-    sh2_new_mode(d, mode);
-    shadow2_audit_p2m(d);
- out:
-    shadow2_unlock(d);
-    domain_unpause(d);
-    return 0;
-}
-
-void shadow2_teardown(struct domain *d)
-/* Destroy the shadow pagetables of this domain and free its shadow memory.
- * Should only be called for dying domains. */
-{
-    struct vcpu *v;
-    mfn_t mfn;
-
-    ASSERT(test_bit(_DOMF_dying, &d->domain_flags));
-    ASSERT(d != current->domain);
-
-    if ( !shadow2_lock_is_acquired(d) )
-        shadow2_lock(d); /* Keep various asserts happy */
-
-    if ( shadow2_mode_enabled(d) )
-    {
-        /* Release the shadow and monitor tables held by each vcpu */
-        for_each_vcpu(d, v)
-        {
-            shadow2_detach_old_tables(v);
-            if ( shadow2_mode_external(d) )
-            {
-                mfn = pagetable_get_mfn(v->arch.monitor_table);
-                if ( valid_mfn(mfn) && (mfn_x(mfn) != 0) )
-                    shadow2_destroy_monitor_table(v, mfn);
-                v->arch.monitor_table = pagetable_null();
-            }
-        }
-    }
-
-    if ( d->arch.shadow2.total_pages != 0 )
-    {
-        SHADOW2_PRINTK("teardown of domain %u starts."
-                       "  Shadow pages total = %u, free = %u, p2m=%u\n",
-                       d->domain_id,
-                       d->arch.shadow2.total_pages, 
-                       d->arch.shadow2.free_pages, 
-                       d->arch.shadow2.p2m_pages);
-        /* Destroy all the shadows and release memory to domheap */
-        set_sh2_allocation(d, 0, NULL);
-        /* Release the hash table back to xenheap */
-        if (d->arch.shadow2.hash_table) 
-            shadow2_hash_teardown(d);
-        /* Release the log-dirty bitmap of dirtied pages */
-        sh2_free_log_dirty_bitmap(d);
-        /* Should not have any more memory held */
-        SHADOW2_PRINTK("teardown done."
-                       "  Shadow pages total = %u, free = %u, p2m=%u\n",
-                       d->arch.shadow2.total_pages, 
-                       d->arch.shadow2.free_pages, 
-                       d->arch.shadow2.p2m_pages);
-        ASSERT(d->arch.shadow2.total_pages == 0);
-    }
-
-    /* We leave the "permanent" shadow modes enabled, but clear the
-     * log-dirty mode bit.  We don't want any more mark_dirty()
-     * calls now that we've torn down the bitmap */
-    d->arch.shadow2.mode &= ~SHM2_log_dirty;
-
-    shadow2_unlock(d);
-}
-
-void shadow2_final_teardown(struct domain *d)
-/* Called by arch_domain_destroy(), when it's safe to pull down the p2m map. */
-{
-
-    SHADOW2_PRINTK("dom %u final teardown starts."
-                   "  Shadow pages total = %u, free = %u, p2m=%u\n",
-                   d->domain_id,
-                   d->arch.shadow2.total_pages, 
-                   d->arch.shadow2.free_pages, 
-                   d->arch.shadow2.p2m_pages);
-
-    /* Double-check that the domain didn't have any shadow memory.  
-     * It is possible for a domain that never got domain_kill()ed
-     * to get here with its shadow allocation intact. */
-    if ( d->arch.shadow2.total_pages != 0 )
-        shadow2_teardown(d);
-
-    /* It is now safe to pull down the p2m map. */
-    if ( d->arch.shadow2.p2m_pages != 0 )
-        shadow2_p2m_teardown(d);
-
-    SHADOW2_PRINTK("dom %u final teardown done."
-                   "  Shadow pages total = %u, free = %u, p2m=%u\n",
-                   d->domain_id,
-                   d->arch.shadow2.total_pages, 
-                   d->arch.shadow2.free_pages, 
-                   d->arch.shadow2.p2m_pages);
-}
-
-static int shadow2_one_bit_enable(struct domain *d, u32 mode)
-/* Turn on a single shadow mode feature */
-{
-    ASSERT(shadow2_lock_is_acquired(d));
-
-    /* Sanity check the call */
-    if ( d == current->domain || (d->arch.shadow2.mode & mode) )
-    {
-        return -EINVAL;
-    }
-
-    if ( d->arch.shadow2.mode == 0 )
-    {
-        /* Init the shadow memory allocation and the hash table */
-        if ( set_sh2_allocation(d, 1, NULL) != 0 
-             || shadow2_hash_alloc(d) != 0 )
-        {
-            set_sh2_allocation(d, 0, NULL);
-            return -ENOMEM;
-        }
-    }
-
-    /* Update the bits */
-    sh2_new_mode(d, d->arch.shadow2.mode | mode);
-
-    return 0;
-}
-
-static int shadow2_one_bit_disable(struct domain *d, u32 mode) 
-/* Turn off a single shadow mode feature */
-{
-    struct vcpu *v;
-    ASSERT(shadow2_lock_is_acquired(d));
-
-    /* Sanity check the call */
-    if ( d == current->domain || !(d->arch.shadow2.mode & mode) )
-    {
-        return -EINVAL;
-    }
-
-    /* Update the bits */
-    sh2_new_mode(d, d->arch.shadow2.mode & ~mode);
-    if ( d->arch.shadow2.mode == 0 )
-    {
-        /* Get this domain off shadows */
-        SHADOW2_PRINTK("un-shadowing of domain %u starts."
-                       "  Shadow pages total = %u, free = %u, p2m=%u\n",
-                       d->domain_id,
-                       d->arch.shadow2.total_pages, 
-                       d->arch.shadow2.free_pages, 
-                       d->arch.shadow2.p2m_pages);
-        for_each_vcpu(d, v)
-        {
-            shadow2_detach_old_tables(v);
-#if CONFIG_PAGING_LEVELS == 4
-            if ( !(v->arch.flags & TF_kernel_mode) )
-                make_cr3(v, pagetable_get_pfn(v->arch.guest_table_user));
-            else
-#endif
-                make_cr3(v, pagetable_get_pfn(v->arch.guest_table));
-
-        }
-
-        /* Pull down the memory allocation */
-        if ( set_sh2_allocation(d, 0, NULL) != 0 )
-        {
-            // XXX - How can this occur?
-            //       Seems like a bug to return an error now that we've
-            //       disabled the relevant shadow mode.
-            //
-            return -ENOMEM;
-        }
-        shadow2_hash_teardown(d);
-        SHADOW2_PRINTK("un-shadowing of domain %u done."
-                       "  Shadow pages total = %u, free = %u, p2m=%u\n",
-                       d->domain_id,
-                       d->arch.shadow2.total_pages, 
-                       d->arch.shadow2.free_pages, 
-                       d->arch.shadow2.p2m_pages);
-    }
-
-    return 0;
-}
-
-/* Enable/disable ops for the "test" and "log-dirty" modes */
-int shadow2_test_enable(struct domain *d)
-{
-    int ret;
-
-    domain_pause(d);
-    shadow2_lock(d);
-
-    if ( shadow2_mode_enabled(d) )
-    {
-        SHADOW2_ERROR("Don't support enabling test mode"
-                      "on already shadowed doms\n");
-        ret = -EINVAL;
-        goto out;
-    }
-
-    ret = shadow2_one_bit_enable(d, SHM2_enable);
- out:
-    shadow2_unlock(d);
-    domain_unpause(d);
-
-    return ret;
-}
-
-int shadow2_test_disable(struct domain *d)
-{
-    int ret;
-
-    domain_pause(d);
-    shadow2_lock(d);
-    ret = shadow2_one_bit_disable(d, SHM2_enable);
-    shadow2_unlock(d);
-    domain_unpause(d);
-
-    return ret;
-}
-
-static int
-sh2_alloc_log_dirty_bitmap(struct domain *d)
-{
-    ASSERT(d->arch.shadow2.dirty_bitmap == NULL);
-    d->arch.shadow2.dirty_bitmap_size =
-        (d->shared_info->arch.max_pfn + (BITS_PER_LONG - 1)) &
-        ~(BITS_PER_LONG - 1);
-    d->arch.shadow2.dirty_bitmap =
-        xmalloc_array(unsigned long,
-                      d->arch.shadow2.dirty_bitmap_size / BITS_PER_LONG);
-    if ( d->arch.shadow2.dirty_bitmap == NULL )
-    {
-        d->arch.shadow2.dirty_bitmap_size = 0;
-        return -ENOMEM;
-    }
-    memset(d->arch.shadow2.dirty_bitmap, 0, d->arch.shadow2.dirty_bitmap_size/8);
-
-    return 0;
-}
-
-static void
-sh2_free_log_dirty_bitmap(struct domain *d)
-{
-    d->arch.shadow2.dirty_bitmap_size = 0;
-    if ( d->arch.shadow2.dirty_bitmap )
-    {
-        xfree(d->arch.shadow2.dirty_bitmap);
-        d->arch.shadow2.dirty_bitmap = NULL;
-    }
-}
-
-static int shadow2_log_dirty_enable(struct domain *d)
-{
-    int ret;
-
-    domain_pause(d);
-    shadow2_lock(d);
-
-    if ( shadow2_mode_log_dirty(d) )
-    {
-        ret = -EINVAL;
-        goto out;
-    }
-
-    if ( shadow2_mode_enabled(d) )
-    {
-        SHADOW2_ERROR("Don't (yet) support enabling log-dirty"
-                      "on already shadowed doms\n");
-        ret = -EINVAL;
-        goto out;
-    }
-
-    ret = sh2_alloc_log_dirty_bitmap(d);
-    if ( ret != 0 )
-    {
-        sh2_free_log_dirty_bitmap(d);
-        goto out;
-    }
-
-    ret = shadow2_one_bit_enable(d, SHM2_log_dirty);
-    if ( ret != 0 )
-        sh2_free_log_dirty_bitmap(d);
-
- out:
-    shadow2_unlock(d);
-    domain_unpause(d);
-    return ret;
-}
-
-static int shadow2_log_dirty_disable(struct domain *d)
-{
-    int ret;
-
-    domain_pause(d);
-    shadow2_lock(d);
-    ret = shadow2_one_bit_disable(d, SHM2_log_dirty);
-    if ( !shadow2_mode_log_dirty(d) )
-        sh2_free_log_dirty_bitmap(d);
-    shadow2_unlock(d);
-    domain_unpause(d);
-
-    return ret;
-}
-
-/**************************************************************************/
-/* P2M map manipulations */
-
-static void
-sh2_p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn)
-{
-    struct vcpu *v;
-
-    if ( !shadow2_mode_translate(d) )
-        return;
-
-    v = current;
-    if ( v->domain != d )
-        v = d->vcpu[0];
-
-
-    SHADOW2_DEBUG(P2M, "removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
-
-    ASSERT(mfn_x(sh2_gfn_to_mfn(d, gfn)) == mfn);
-    //ASSERT(sh2_mfn_to_gfn(d, mfn) == gfn);
-
-    shadow2_remove_all_shadows_and_parents(v, _mfn(mfn));
-    if ( shadow2_remove_all_mappings(v, _mfn(mfn)) )
-        flush_tlb_mask(d->domain_dirty_cpumask);
-    shadow2_set_p2m_entry(d, gfn, _mfn(INVALID_MFN));
-    set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
-}
-
-void
-shadow2_guest_physmap_remove_page(struct domain *d, unsigned long gfn,
-                                  unsigned long mfn)
-{
-    shadow2_lock(d);
-    shadow2_audit_p2m(d);
-    sh2_p2m_remove_page(d, gfn, mfn);
-    shadow2_audit_p2m(d);
-    shadow2_unlock(d);    
-}
-
-void
-shadow2_guest_physmap_add_page(struct domain *d, unsigned long gfn,
-                               unsigned long mfn)
-{
-    struct vcpu *v;
-    unsigned long ogfn;
-    mfn_t omfn;
-
-    if ( !shadow2_mode_translate(d) )
-        return;
-
-    v = current;
-    if ( v->domain != d )
-        v = d->vcpu[0];
-
-    shadow2_lock(d);
-    shadow2_audit_p2m(d);
-
-    SHADOW2_DEBUG(P2M, "adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
-
-    omfn = sh2_gfn_to_mfn(d, gfn);
-    if ( valid_mfn(omfn) )
-    {
-        /* Get rid of the old mapping, especially any shadows */
-        shadow2_remove_all_shadows_and_parents(v, omfn);
-        if ( shadow2_remove_all_mappings(v, omfn) )
-            flush_tlb_mask(d->domain_dirty_cpumask);
-        set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
-    }        
-
-    ogfn = sh2_mfn_to_gfn(d, _mfn(mfn));
-    if (
-#ifdef __x86_64__
-        (ogfn != 0x5555555555555555L)
-#else
-        (ogfn != 0x55555555L)
-#endif
-        && (ogfn != INVALID_M2P_ENTRY)
-        && (ogfn != gfn) )
-    {
-        /* This machine frame is already mapped at another physical address */
-        SHADOW2_DEBUG(P2M, "aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
-                       mfn, ogfn, gfn);
-        if ( valid_mfn(omfn = sh2_gfn_to_mfn(d, ogfn)) ) 
-        {
-            SHADOW2_DEBUG(P2M, "old gfn=%#lx -> mfn %#lx\n", 
-                           ogfn , mfn_x(omfn));
-            if ( mfn_x(omfn) == mfn ) 
-                sh2_p2m_remove_page(d, ogfn, mfn);
-        }
-    }
-
-    shadow2_set_p2m_entry(d, gfn, _mfn(mfn));
-    set_gpfn_from_mfn(mfn, gfn);
-    shadow2_audit_p2m(d);
-    shadow2_unlock(d);
-}
-
-/**************************************************************************/
-/* Log-dirty mode support */
-
-/* Convert a shadow to log-dirty mode. */
-void shadow2_convert_to_log_dirty(struct vcpu *v, mfn_t smfn)
-{
-    BUG();
-}
-
-
-/* Read a domain's log-dirty bitmap and stats.  
- * If the operation is a CLEAN, clear the bitmap and stats as well. */
-static int shadow2_log_dirty_op(
-    struct domain *d, struct xen_domctl_shadow_op *sc)
-{
-    int i, rv = 0, clean = 0;
-
-    domain_pause(d);
-    shadow2_lock(d);
-
-    clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
-
-    SHADOW2_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n", 
-                  (clean) ? "clean" : "peek",
-                  d->domain_id,
-                  d->arch.shadow2.fault_count, 
-                  d->arch.shadow2.dirty_count);
-
-    sc->stats.fault_count = d->arch.shadow2.fault_count;
-    sc->stats.dirty_count = d->arch.shadow2.dirty_count;    
-        
-    if ( clean ) 
-    {
-        struct list_head *l, *t;
-        struct page_info *pg;
-
-        /* Need to revoke write access to the domain's pages again. 
-         * In future, we'll have a less heavy-handed approach to this, 
-         * but for now, we just unshadow everything except Xen. */
-        list_for_each_safe(l, t, &d->arch.shadow2.toplevel_shadows)
-        {
-            pg = list_entry(l, struct page_info, list);
-            shadow2_unhook_mappings(d->vcpu[0], page_to_mfn(pg));
-        }
-
-        d->arch.shadow2.fault_count = 0;
-        d->arch.shadow2.dirty_count = 0;
-    }
-
-    if ( guest_handle_is_null(sc->dirty_bitmap) ||
-         (d->arch.shadow2.dirty_bitmap == NULL) )
-    {
-        rv = -EINVAL;
-        goto out;
-    }
- 
-    if ( sc->pages > d->arch.shadow2.dirty_bitmap_size )
-        sc->pages = d->arch.shadow2.dirty_bitmap_size; 
-
-#define CHUNK (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
-    for ( i = 0; i < sc->pages; i += CHUNK )
-    {
-        int bytes = ((((sc->pages - i) > CHUNK) 
-                      ? CHUNK 
-                      : (sc->pages - i)) + 7) / 8;
-     
-        if ( copy_to_guest_offset(
-                 sc->dirty_bitmap, 
-                 i/(8*sizeof(unsigned long)),
-                 d->arch.shadow2.dirty_bitmap + (i/(8*sizeof(unsigned long))),
-                 (bytes + sizeof(unsigned long) - 1) / sizeof(unsigned long)) )
-        {
-            rv = -EINVAL;
-            goto out;
-        }
-
-        if ( clean )
-            memset(d->arch.shadow2.dirty_bitmap + (i/(8*sizeof(unsigned long))),
-                   0, bytes);
-    }
-#undef CHUNK
-
- out:
-    shadow2_unlock(d);
-    domain_unpause(d);
-    return 0;
-}
-
-
-/* Mark a page as dirty */
-void sh2_do_mark_dirty(struct domain *d, mfn_t gmfn)
-{
-    unsigned long pfn;
-
-    ASSERT(shadow2_lock_is_acquired(d));
-    ASSERT(shadow2_mode_log_dirty(d));
-
-    if ( !valid_mfn(gmfn) )
-        return;
-
-    ASSERT(d->arch.shadow2.dirty_bitmap != NULL);
-
-    /* We /really/ mean PFN here, even for non-translated guests. */
-    pfn = get_gpfn_from_mfn(mfn_x(gmfn));
-
-    /*
-     * Values with the MSB set denote MFNs that aren't really part of the 
-     * domain's pseudo-physical memory map (e.g., the shared info frame).
-     * Nothing to do here...
-     */
-    if ( unlikely(!VALID_M2P(pfn)) )
-        return;
-
-    /* N.B. Can use non-atomic TAS because protected by shadow2_lock. */
-    if ( likely(pfn < d->arch.shadow2.dirty_bitmap_size) ) 
-    { 
-        if ( !__test_and_set_bit(pfn, d->arch.shadow2.dirty_bitmap) )
-        {
-            SHADOW2_DEBUG(LOGDIRTY, 
-                          "marked mfn %" SH2_PRI_mfn " (pfn=%lx), dom %d\n",
-                          mfn_x(gmfn), pfn, d->domain_id);
-            d->arch.shadow2.dirty_count++;
-        }
-    }
-    else
-    {
-        SHADOW2_PRINTK("mark_dirty OOR! "
-                       "mfn=%" SH2_PRI_mfn " pfn=%lx max=%x (dom %d)\n"
-                       "owner=%d c=%08x t=%" PRtype_info "\n",
-                       mfn_x(gmfn), 
-                       pfn, 
-                       d->arch.shadow2.dirty_bitmap_size,
-                       d->domain_id,
-                       (page_get_owner(mfn_to_page(gmfn))
-                        ? page_get_owner(mfn_to_page(gmfn))->domain_id
-                        : -1),
-                       mfn_to_page(gmfn)->count_info, 
-                       mfn_to_page(gmfn)->u.inuse.type_info);
-    }
-}
-
-
-/**************************************************************************/
-/* Shadow-control XEN_DOMCTL dispatcher */
-
-int shadow2_domctl(struct domain *d, 
-                   xen_domctl_shadow_op_t *sc,
-                   XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
-{
-    int rc, preempted = 0;
-
-    if ( unlikely(d == current->domain) )
-    {
-        DPRINTK("Don't try to do a shadow op on yourself!\n");
-        return -EINVAL;
-    }
-
-    switch ( sc->op )
-    {
-    case XEN_DOMCTL_SHADOW_OP_OFF:
-        if ( shadow2_mode_log_dirty(d) )
-            if ( (rc = shadow2_log_dirty_disable(d)) != 0 ) 
-                return rc;
-        if ( d->arch.shadow2.mode & SHM2_enable )
-            if ( (rc = shadow2_test_disable(d)) != 0 ) 
-                return rc;
-        return 0;
-
-    case XEN_DOMCTL_SHADOW_OP_ENABLE_TEST:
-        return shadow2_test_enable(d);
-
-    case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
-        return shadow2_log_dirty_enable(d);
-
-    case XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE:
-        return shadow2_enable(d, SHM2_refcounts|SHM2_translate);
-
-    case XEN_DOMCTL_SHADOW_OP_CLEAN:
-    case XEN_DOMCTL_SHADOW_OP_PEEK:
-        return shadow2_log_dirty_op(d, sc);
-
-    case XEN_DOMCTL_SHADOW_OP_ENABLE:
-        if ( sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY )
-            return shadow2_log_dirty_enable(d);
-        return shadow2_enable(d, sc->mode << SHM2_shift);
-
-    case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
-        sc->mb = shadow2_get_allocation(d);
-        return 0;
-
-    case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
-        rc = shadow2_set_allocation(d, sc->mb, &preempted);
-        if ( preempted )
-            /* Not finished.  Set up to re-run the call. */
-            rc = hypercall_create_continuation(
-                __HYPERVISOR_domctl, "h", u_domctl);
-        else 
-            /* Finished.  Return the new allocation */
-            sc->mb = shadow2_get_allocation(d);
-        return rc;
-
-    default:
-        SHADOW2_ERROR("Bad shadow op %u\n", sc->op);
-        return -EINVAL;
-    }
-}
-
-
-/**************************************************************************/
-/* Auditing shadow tables */
-
-#if SHADOW2_AUDIT & SHADOW2_AUDIT_ENTRIES_FULL
-
-void shadow2_audit_tables(struct vcpu *v) 
-{
-    /* Dispatch table for getting per-type functions */
-    static hash_callback_t callbacks[16] = {
-        NULL, /* none    */
-#if CONFIG_PAGING_LEVELS == 2
-        SHADOW2_INTERNAL_NAME(sh2_audit_l1_table,2,2),  /* l1_32   */
-        SHADOW2_INTERNAL_NAME(sh2_audit_fl1_table,2,2), /* fl1_32  */
-        SHADOW2_INTERNAL_NAME(sh2_audit_l2_table,2,2),  /* l2_32   */
-#else 
-        SHADOW2_INTERNAL_NAME(sh2_audit_l1_table,3,2),  /* l1_32   */
-        SHADOW2_INTERNAL_NAME(sh2_audit_fl1_table,3,2), /* fl1_32  */
-        SHADOW2_INTERNAL_NAME(sh2_audit_l2_table,3,2),  /* l2_32   */
-        SHADOW2_INTERNAL_NAME(sh2_audit_l1_table,3,3),  /* l1_pae  */
-        SHADOW2_INTERNAL_NAME(sh2_audit_fl1_table,3,3), /* fl1_pae */
-        SHADOW2_INTERNAL_NAME(sh2_audit_l2_table,3,3),  /* l2_pae  */
-        SHADOW2_INTERNAL_NAME(sh2_audit_l2_table,3,3),  /* l2h_pae */
-        SHADOW2_INTERNAL_NAME(sh2_audit_l3_table,3,3),  /* l3_pae  */
-#if CONFIG_PAGING_LEVELS >= 4
-        SHADOW2_INTERNAL_NAME(sh2_audit_l1_table,4,4),  /* l1_64   */
-        SHADOW2_INTERNAL_NAME(sh2_audit_fl1_table,4,4), /* fl1_64  */
-        SHADOW2_INTERNAL_NAME(sh2_audit_l2_table,4,4),  /* l2_64   */
-        SHADOW2_INTERNAL_NAME(sh2_audit_l3_table,4,4),  /* l3_64   */
-        SHADOW2_INTERNAL_NAME(sh2_audit_l4_table,4,4),  /* l4_64   */
-#endif /* CONFIG_PAGING_LEVELS >= 4 */
-#endif /* CONFIG_PAGING_LEVELS > 2 */
-        NULL  /* All the rest */
-    };
-    unsigned int mask; 
-
-    if ( !(SHADOW2_AUDIT_ENABLE) )
-        return;
-    
-    if ( SHADOW2_AUDIT & SHADOW2_AUDIT_ENTRIES_FULL )
-        mask = ~1; /* Audit every table in the system */
-    else 
-    {
-        /* Audit only the current mode's tables */
-        switch ( v->arch.shadow2.mode->guest_levels )
-        {
-        case 2: mask = (SH2F_L1_32|SH2F_FL1_32|SH2F_L2_32); break;
-        case 3: mask = (SH2F_L1_PAE|SH2F_FL1_PAE|SH2F_L2_PAE
-                        |SH2F_L2H_PAE|SH2F_L3_PAE); break;
-        case 4: mask = (SH2F_L1_64|SH2F_FL1_64|SH2F_L2_64  
-                        |SH2F_L3_64|SH2F_L4_64); break;
-        default: BUG();
-        }
-    }
-
-    hash_foreach(v, ~1, callbacks, _mfn(INVALID_MFN));
-}
-
-#endif /* Shadow audit */
-
-
-/**************************************************************************/
-/* Auditing p2m tables */
-
-#if SHADOW2_AUDIT & SHADOW2_AUDIT_P2M
-
-void shadow2_audit_p2m(struct domain *d)
-{
-    struct list_head *entry;
-    struct page_info *page;
-    struct domain *od;
-    unsigned long mfn, gfn, m2pfn, lp2mfn = 0;
-    mfn_t p2mfn;
-    unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0;
-    int test_linear;
-    
-    if ( !(SHADOW2_AUDIT_ENABLE) || !shadow2_mode_translate(d) )
-        return;
-
-    //SHADOW2_PRINTK("p2m audit starts\n");
-
-    test_linear = ( (d == current->domain) && current->arch.monitor_vtable );
-    if ( test_linear )
-        local_flush_tlb(); 
-
-    /* Audit part one: walk the domain's page allocation list, checking 
-     * the m2p entries. */
-    for ( entry = d->page_list.next;
-          entry != &d->page_list;
-          entry = entry->next )
-    {
-        page = list_entry(entry, struct page_info, list);
-        mfn = mfn_x(page_to_mfn(page));
-
-        // SHADOW2_PRINTK("auditing guest page, mfn=%#lx\n", mfn); 
-
-        od = page_get_owner(page);
-
-        if ( od != d ) 
-        {
-            SHADOW2_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n",
-                           mfn, od, (od?od->domain_id:-1), d, d->domain_id);
-            continue;
-        }
-
-        gfn = get_gpfn_from_mfn(mfn);
-        if ( gfn == INVALID_M2P_ENTRY ) 
-        {
-            orphans_i++;
-            //SHADOW2_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n",
-            //               mfn); 
-            continue;
-        }
-
-        if ( gfn == 0x55555555 ) 
-        {
-            orphans_d++;
-            //SHADOW2_PRINTK("orphaned guest page: mfn=%#lx has debug gfn\n", 
-            //               mfn); 
-            continue;
-        }
-
-        p2mfn = sh2_gfn_to_mfn_foreign(d, gfn);
-        if ( mfn_x(p2mfn) != mfn )
-        {
-            mpbad++;
-            SHADOW2_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx"
-                           " (-> gfn %#lx)\n",
-                           mfn, gfn, mfn_x(p2mfn),
-                           (mfn_valid(p2mfn)
-                            ? get_gpfn_from_mfn(mfn_x(p2mfn))
-                            : -1u));
-            /* This m2p entry is stale: the domain has another frame in
-             * this physical slot.  No great disaster, but for neatness,
-             * blow away the m2p entry. */ 
-            set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
-        }
-
-        if ( test_linear )
-        {
-            lp2mfn = get_mfn_from_gpfn(gfn);
-            if ( lp2mfn != mfn_x(p2mfn) )
-            {
-                SHADOW2_PRINTK("linear mismatch gfn %#lx -> mfn %#lx "
-                               "(!= mfn %#lx)\n", gfn, lp2mfn, p2mfn);
-            }
-        }
-
-        // SHADOW2_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx, lp2mfn=%#lx\n", 
-        //                mfn, gfn, p2mfn, lp2mfn); 
-    }   
-
-    /* Audit part two: walk the domain's p2m table, checking the entries. */
-    if ( pagetable_get_pfn(d->arch.phys_table) != 0 )
-    {
-        l2_pgentry_t *l2e;
-        l1_pgentry_t *l1e;
-        int i1, i2;
-        
-#if CONFIG_PAGING_LEVELS == 4
-        l4_pgentry_t *l4e;
-        l3_pgentry_t *l3e;
-        int i3, i4;
-        l4e = sh2_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
-#elif CONFIG_PAGING_LEVELS == 3
-        l3_pgentry_t *l3e;
-        int i3;
-        l3e = sh2_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
-#else /* CONFIG_PAGING_LEVELS == 2 */
-        l2e = sh2_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
-#endif
-
-        gfn = 0;
-#if CONFIG_PAGING_LEVELS >= 3
-#if CONFIG_PAGING_LEVELS >= 4
-        for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
-        {
-            if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
-            {
-                gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT);
-                continue;
-            }
-            l3e = sh2_map_domain_page(_mfn(l4e_get_pfn(l4e[i4])));
-#endif /* now at levels 3 or 4... */
-            for ( i3 = 0; 
-                  i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8); 
-                  i3++ )
-            {
-                if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
-                {
-                    gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
-                    continue;
-                }
-                l2e = sh2_map_domain_page(_mfn(l3e_get_pfn(l3e[i3])));
-#endif /* all levels... */
-                for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
-                {
-                    if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
-                    {
-                        gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
-                        continue;
-                    }
-                    l1e = sh2_map_domain_page(_mfn(l2e_get_pfn(l2e[i2])));
-                    
-                    for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
-                    {
-                        if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
-                            continue;
-                        mfn = l1e_get_pfn(l1e[i1]);
-                        ASSERT(valid_mfn(_mfn(mfn)));
-                        m2pfn = get_gpfn_from_mfn(mfn);
-                        if ( m2pfn != gfn )
-                        {
-                            pmbad++;
-                            SHADOW2_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
-                                           " -> gfn %#lx\n", gfn, mfn, m2pfn);
-                            BUG();
-                        }
-                    }
-                    sh2_unmap_domain_page(l1e);
-                }
-#if CONFIG_PAGING_LEVELS >= 3
-                sh2_unmap_domain_page(l2e);
-            }
-#if CONFIG_PAGING_LEVELS >= 4
-            sh2_unmap_domain_page(l3e);
-        }
-#endif
-#endif
-
-#if CONFIG_PAGING_LEVELS == 4
-        sh2_unmap_domain_page(l4e);
-#elif CONFIG_PAGING_LEVELS == 3
-        sh2_unmap_domain_page(l3e);
-#else /* CONFIG_PAGING_LEVELS == 2 */
-        sh2_unmap_domain_page(l2e);
-#endif
-
-    }
-
-    //SHADOW2_PRINTK("p2m audit complete\n");
-    //if ( orphans_i | orphans_d | mpbad | pmbad ) 
-    //    SHADOW2_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n",
-    //                   orphans_i + orphans_d, orphans_i, orphans_d,
-    if ( mpbad | pmbad ) 
-        SHADOW2_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n",
-                       pmbad, mpbad);
-}
-
-#endif /* p2m audit */
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End: 
- */
diff --git a/xen/arch/x86/shadow2.c b/xen/arch/x86/shadow2.c

deleted file mode 100644 (file)

index 943091b..0000000
--- a/xen/arch/x86/shadow2.c
+++ /dev/null
@@ -1,4492 +0,0 @@
-/******************************************************************************
- * arch/x86/shadow2.c
- *
- * Simple, mostly-synchronous shadow page tables. 
- * Parts of this code are Copyright (c) 2006 by XenSource Inc.
- * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
- * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-// DESIGN QUESTIONS:
-// Why use subshadows for PAE guests?
-// - reduces pressure in the hash table
-// - reduces shadow size (64-vs-4096 bytes of shadow for 32 bytes of guest L3)
-// - would need to find space in the page_info to store 7 more bits of
-//   backpointer
-// - independent shadows of 32 byte chunks makes it non-obvious how to quickly
-//   figure out when to demote the guest page from l3 status
-//
-// PAE Xen HVM guests are restricted to 8GB of pseudo-physical address space.
-// - Want to map the P2M table into the 16MB RO_MPT hole in Xen's address
-//   space for both PV and HVM guests.
-//
-
-#define SHADOW2 1
-
-#include <xen/config.h>
-#include <xen/types.h>
-#include <xen/mm.h>
-#include <xen/trace.h>
-#include <xen/sched.h>
-#include <xen/perfc.h>
-#include <xen/domain_page.h>
-#include <asm/page.h>
-#include <asm/current.h>
-#include <asm/shadow2.h>
-#include <asm/shadow2-private.h>
-#include <asm/shadow2-types.h>
-#include <asm/flushtlb.h>
-#include <asm/hvm/hvm.h>
-
-/* The first cut: an absolutely synchronous, trap-and-emulate version,
- * supporting only HVM guests (and so only "external" shadow mode). 
- *
- * THINGS TO DO LATER:
- * 
- * FIX GVA_TO_GPA
- * The current interface returns an unsigned long, which is not big enough
- * to hold a physical address in PAE.  Should return a gfn instead.
- * 
- * TEARDOWN HEURISTICS
- * Also: have a heuristic for when to destroy a previous paging-mode's 
- * shadows.  When a guest is done with its start-of-day 32-bit tables
- * and reuses the memory we want to drop those shadows.  Start with 
- * shadows in a page in two modes as a hint, but beware of clever tricks 
- * like reusing a pagetable for both PAE and 64-bit during boot...
- *
- * PAE LINEAR MAPS
- * Rework shadow_get_l*e() to have the option of using map_domain_page()
- * instead of linear maps.  Add appropriate unmap_l*e calls in the users. 
- * Then we can test the speed difference made by linear maps.  If the 
- * map_domain_page() version is OK on PAE, we could maybe allow a lightweight 
- * l3-and-l2h-only shadow mode for PAE PV guests that would allow them 
- * to share l2h pages again. 
- *
- * PAE L3 COPYING
- * In this code, we copy all 32 bytes of a PAE L3 every time we change an 
- * entry in it, and every time we change CR3.  We copy it for the linear 
- * mappings (ugh! PAE linear mappings) and we copy it to the low-memory
- * buffer so it fits in CR3.  Maybe we can avoid some of this recopying 
- * by using the shadow directly in some places. 
- * Also, for SMP, need to actually respond to seeing shadow2.pae_flip_pending.
- *
- * GUEST_WALK_TABLES TLB FLUSH COALESCE
- * guest_walk_tables can do up to three remote TLB flushes as it walks to
- * the first l1 of a new pagetable.  Should coalesce the flushes to the end, 
- * and if we do flush, re-do the walk.  If anything has changed, then 
- * pause all the other vcpus and do the walk *again*.
- *
- * WP DISABLED
- * Consider how to implement having the WP bit of CR0 set to 0.  
- * Since we need to be able to cause write faults to pagetables, this might
- * end up looking like not having the (guest) pagetables present at all in 
- * HVM guests...
- *
- * PSE disabled / PSE36
- * We don't support any modes other than PSE enabled, PSE36 disabled.
- * Neither of those would be hard to change, but we'd need to be able to 
- * deal with shadows made in one mode and used in another.
- */
-
-#define FETCH_TYPE_PREFETCH 1
-#define FETCH_TYPE_DEMAND   2
-#define FETCH_TYPE_WRITE    4
-typedef enum {
-    ft_prefetch     = FETCH_TYPE_PREFETCH,
-    ft_demand_read  = FETCH_TYPE_DEMAND,
-    ft_demand_write = FETCH_TYPE_DEMAND | FETCH_TYPE_WRITE,
-} fetch_type_t;
-
-#ifdef DEBUG_TRACE_DUMP
-static char *fetch_type_names[] = {
-    [ft_prefetch]     "prefetch",
-    [ft_demand_read]  "demand read",
-    [ft_demand_write] "demand write",
-};
-#endif
-
-/* XXX forward declarations */
-#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
-static unsigned long hvm_pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab, int clear_res);
-#endif
-static inline void sh2_update_linear_entries(struct vcpu *v);
-
-/**************************************************************************/
-/* Hash table mapping from guest pagetables to shadows
- *
- * Normal case: maps the mfn of a guest page to the mfn of its shadow page.
- * FL1's:       maps the *gfn* of the start of a superpage to the mfn of a
- *              shadow L1 which maps its "splinters".
- * PAE CR3s:    maps the 32-byte aligned, 32-bit CR3 value to the mfn of the
- *              PAE L3 info page for that CR3 value.
- */
-
-static inline mfn_t 
-get_fl1_shadow_status(struct vcpu *v, gfn_t gfn)
-/* Look for FL1 shadows in the hash table */
-{
-    mfn_t smfn = shadow2_hash_lookup(v, gfn_x(gfn),
-                                     PGC_SH2_fl1_shadow >> PGC_SH2_type_shift);
-
-    if ( unlikely(shadow2_mode_log_dirty(v->domain) && valid_mfn(smfn)) )
-    {
-        struct page_info *page = mfn_to_page(smfn);
-        if ( !(page->count_info & PGC_SH2_log_dirty) )
-            shadow2_convert_to_log_dirty(v, smfn);
-    }
-
-    return smfn;
-}
-
-static inline mfn_t 
-get_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type)
-/* Look for shadows in the hash table */
-{
-    mfn_t smfn = shadow2_hash_lookup(v, mfn_x(gmfn),
-                                     shadow_type >> PGC_SH2_type_shift);
-    perfc_incrc(shadow2_get_shadow_status);
-
-    if ( unlikely(shadow2_mode_log_dirty(v->domain) && valid_mfn(smfn)) )
-    {
-        struct page_info *page = mfn_to_page(smfn);
-        if ( !(page->count_info & PGC_SH2_log_dirty) )
-            shadow2_convert_to_log_dirty(v, smfn);
-    }
-
-    return smfn;
-}
-
-static inline void 
-set_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
-/* Put an FL1 shadow into the hash table */
-{
-    SHADOW2_PRINTK("gfn=%"SH2_PRI_gfn", type=%08x, smfn=%05lx\n",
-                   gfn_x(gfn), PGC_SH2_fl1_shadow, mfn_x(smfn));
-
-    if ( unlikely(shadow2_mode_log_dirty(v->domain)) )
-        // mark this shadow as a log dirty shadow...
-        set_bit(_PGC_SH2_log_dirty, &mfn_to_page(smfn)->count_info);
-    else
-        clear_bit(_PGC_SH2_log_dirty, &mfn_to_page(smfn)->count_info);
-
-    shadow2_hash_insert(v, gfn_x(gfn),
-                        PGC_SH2_fl1_shadow >> PGC_SH2_type_shift, smfn);
-}
-
-static inline void 
-set_shadow2_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
-/* Put a shadow into the hash table */
-{
-    struct domain *d = v->domain;
-    int res;
-
-    SHADOW2_PRINTK("d=%d, v=%d, gmfn=%05lx, type=%08x, smfn=%05lx\n",
-                   d->domain_id, v->vcpu_id, mfn_x(gmfn),
-                   shadow_type, mfn_x(smfn));
-
-    if ( unlikely(shadow2_mode_log_dirty(d)) )
-        // mark this shadow as a log dirty shadow...
-        set_bit(_PGC_SH2_log_dirty, &mfn_to_page(smfn)->count_info);
-    else
-        clear_bit(_PGC_SH2_log_dirty, &mfn_to_page(smfn)->count_info);
-
-    res = get_page(mfn_to_page(gmfn), d);
-    ASSERT(res == 1);
-
-    shadow2_hash_insert(v, mfn_x(gmfn), shadow_type >> PGC_SH2_type_shift,
-                        smfn);
-}
-
-static inline void 
-delete_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
-/* Remove a shadow from the hash table */
-{
-    SHADOW2_PRINTK("gfn=%"SH2_PRI_gfn", type=%08x, smfn=%05lx\n",
-                   gfn_x(gfn), PGC_SH2_fl1_shadow, mfn_x(smfn));
-
-    shadow2_hash_delete(v, gfn_x(gfn),
-                        PGC_SH2_fl1_shadow >> PGC_SH2_type_shift, smfn);
-}
-
-static inline void 
-delete_shadow2_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
-/* Remove a shadow from the hash table */
-{
-    SHADOW2_PRINTK("d=%d, v=%d, gmfn=%05lx, type=%08x, smfn=%05lx\n",
-                   v->domain->domain_id, v->vcpu_id,
-                   mfn_x(gmfn), shadow_type, mfn_x(smfn));
-    shadow2_hash_delete(v, mfn_x(gmfn),
-                        shadow_type >> PGC_SH2_type_shift, smfn);
-    put_page(mfn_to_page(gmfn));
-}
-
-/**************************************************************************/
-/* CPU feature support querying */
-
-static inline int
-guest_supports_superpages(struct vcpu *v)
-{
-    /* The _PAGE_PSE bit must be honoured in HVM guests, whenever
-     * CR4.PSE is set or the guest is in PAE or long mode */
-    return (hvm_guest(v) && (GUEST_PAGING_LEVELS != 2 
-                             || (hvm_get_guest_ctrl_reg(v, 4) & X86_CR4_PSE)));
-}
-
-static inline int
-guest_supports_nx(struct vcpu *v)
-{
-    if ( !hvm_guest(v) )
-        return cpu_has_nx;
-
-    // XXX - fix this!
-    return 1;
-}
-
-
-/**************************************************************************/
-/* Functions for walking the guest page tables */
-
-
-/* Walk the guest pagetables, filling the walk_t with what we see. 
- * Takes an uninitialised walk_t.  The caller must call unmap_walk() 
- * on the walk_t before discarding it or calling guest_walk_tables again. 
- * If "guest_op" is non-zero, we are serving a genuine guest memory access, 
- * and must (a) be under the shadow2 lock, and (b) remove write access
- * from any gueat PT pages we see, as we will be using their contents to 
- * perform shadow updates.
- * Returns 0 for success or non-zero if the guest pagetables are malformed.
- * N.B. Finding a not-present entry does not cause a non-zero return code. */
-static inline int 
-guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op)
-{
-    ASSERT(!guest_op || shadow2_lock_is_acquired(v->domain));
-
-    perfc_incrc(shadow2_guest_walk);
-    memset(gw, 0, sizeof(*gw));
-    gw->va = va;
-
-#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
-#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
-    /* Get l4e from the top level table */
-    gw->l4mfn = pagetable_get_mfn(v->arch.guest_table);
-    gw->l4e = (guest_l4e_t *)v->arch.guest_vtable + guest_l4_table_offset(va);
-    /* Walk down to the l3e */
-    if ( !(guest_l4e_get_flags(*gw->l4e) & _PAGE_PRESENT) ) return 0;
-    gw->l3mfn = vcpu_gfn_to_mfn(v, guest_l4e_get_gfn(*gw->l4e));
-    if ( !valid_mfn(gw->l3mfn) ) return 1;
-    /* This mfn is a pagetable: make sure the guest can't write to it. */
-    if ( guest_op && shadow2_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
-        flush_tlb_mask(v->domain->domain_dirty_cpumask); 
-    gw->l3e = ((guest_l3e_t *)sh2_map_domain_page(gw->l3mfn))
-        + guest_l3_table_offset(va);
-#else /* PAE only... */
-    /* Get l3e from the top level table */
-    gw->l3mfn = pagetable_get_mfn(v->arch.guest_table);
-    gw->l3e = (guest_l3e_t *)v->arch.guest_vtable + guest_l3_table_offset(va);
-#endif /* PAE or 64... */
-    /* Walk down to the l2e */
-    if ( !(guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT) ) return 0;
-    gw->l2mfn = vcpu_gfn_to_mfn(v, guest_l3e_get_gfn(*gw->l3e));
-    if ( !valid_mfn(gw->l2mfn) ) return 1;
-    /* This mfn is a pagetable: make sure the guest can't write to it. */
-    if ( guest_op && shadow2_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
-        flush_tlb_mask(v->domain->domain_dirty_cpumask); 
-    gw->l2e = ((guest_l2e_t *)sh2_map_domain_page(gw->l2mfn))
-        + guest_l2_table_offset(va);
-#else /* 32-bit only... */
-    /* Get l2e from the top level table */
-    gw->l2mfn = pagetable_get_mfn(v->arch.guest_table);
-    gw->l2e = (guest_l2e_t *)v->arch.guest_vtable + guest_l2_table_offset(va);
-#endif /* All levels... */
-    
-    if ( !(guest_l2e_get_flags(*gw->l2e) & _PAGE_PRESENT) ) return 0;
-    if ( guest_supports_superpages(v) &&
-         (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE) ) 
-    {
-        /* Special case: this guest VA is in a PSE superpage, so there's
-         * no guest l1e.  We make one up so that the propagation code
-         * can generate a shadow l1 table.  Start with the gfn of the 
-         * first 4k-page of the superpage. */
-        gfn_t start = guest_l2e_get_gfn(*gw->l2e);
-        /* Grant full access in the l1e, since all the guest entry's 
-         * access controls are enforced in the shadow l2e.  This lets 
-         * us reflect l2 changes later without touching the l1s. */
-        int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
-                     _PAGE_ACCESSED|_PAGE_DIRTY);
-        /* PSE level 2 entries use bit 12 for PAT; propagate it to bit 7
-         * of the level 1 */
-        if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE_PAT) ) 
-            flags |= _PAGE_PAT; 
-        /* Increment the pfn by the right number of 4k pages.  
-         * The ~0x1 is to mask out the PAT bit mentioned above. */
-        start = _gfn((gfn_x(start) & ~0x1) + guest_l1_table_offset(va));
-        gw->eff_l1e = guest_l1e_from_gfn(start, flags);
-        gw->l1e = NULL;
-        gw->l1mfn = _mfn(INVALID_MFN);
-    } 
-    else 
-    {
-        /* Not a superpage: carry on and find the l1e. */
-        gw->l1mfn = vcpu_gfn_to_mfn(v, guest_l2e_get_gfn(*gw->l2e));
-        if ( !valid_mfn(gw->l1mfn) ) return 1;
-        /* This mfn is a pagetable: make sure the guest can't write to it. */
-        if ( guest_op 
-             && shadow2_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
-            flush_tlb_mask(v->domain->domain_dirty_cpumask); 
-        gw->l1e = ((guest_l1e_t *)sh2_map_domain_page(gw->l1mfn))
-            + guest_l1_table_offset(va);
-        gw->eff_l1e = *gw->l1e;
-    }
-
-    return 0;
-}
-
-/* Given a walk_t, translate the gw->va into the guest's notion of the
- * corresponding frame number. */
-static inline gfn_t
-guest_walk_to_gfn(walk_t *gw)
-{
-    if ( !(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT) )
-        return _gfn(INVALID_GFN);
-    return guest_l1e_get_gfn(gw->eff_l1e);
-}
-
-/* Given a walk_t, translate the gw->va into the guest's notion of the
- * corresponding physical address. */
-static inline paddr_t
-guest_walk_to_gpa(walk_t *gw)
-{
-    if ( !(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT) )
-        return 0;
-    return guest_l1e_get_paddr(gw->eff_l1e) + (gw->va & ~PAGE_MASK);
-}
-
-
-/* Unmap (and reinitialise) a guest walk.  
- * Call this to dispose of any walk filled in by guest_walk_tables() */
-static void unmap_walk(struct vcpu *v, walk_t *gw)
-{
-#if GUEST_PAGING_LEVELS >= 3
-#if GUEST_PAGING_LEVELS >= 4
-    if ( gw->l3e != NULL ) sh2_unmap_domain_page(gw->l3e);
-#endif
-    if ( gw->l2e != NULL ) sh2_unmap_domain_page(gw->l2e);
-#endif
-    if ( gw->l1e != NULL ) sh2_unmap_domain_page(gw->l1e);
-#ifdef DEBUG
-    memset(gw, 0, sizeof(*gw));
-#endif
-}
-
-
-/* Pretty-print the contents of a guest-walk */
-static inline void print_gw(walk_t *gw)
-{
-    SHADOW2_PRINTK("GUEST WALK TO %#lx:\n", gw->va);
-#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
-#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
-    SHADOW2_PRINTK("   l4mfn=%" SH2_PRI_mfn "\n", mfn_x(gw->l4mfn));
-    SHADOW2_PRINTK("   l4e=%p\n", gw->l4e);
-    if ( gw->l4e )
-        SHADOW2_PRINTK("   *l4e=%" SH2_PRI_gpte "\n", gw->l4e->l4);
-#endif /* PAE or 64... */
-    SHADOW2_PRINTK("   l3mfn=%" SH2_PRI_mfn "\n", mfn_x(gw->l3mfn));
-    SHADOW2_PRINTK("   l3e=%p\n", gw->l3e);
-    if ( gw->l3e )
-        SHADOW2_PRINTK("   *l3e=%" SH2_PRI_gpte "\n", gw->l3e->l3);
-#endif /* All levels... */
-    SHADOW2_PRINTK("   l2mfn=%" SH2_PRI_mfn "\n", mfn_x(gw->l2mfn));
-    SHADOW2_PRINTK("   l2e=%p\n", gw->l2e);
-    if ( gw->l2e )
-        SHADOW2_PRINTK("   *l2e=%" SH2_PRI_gpte "\n", gw->l2e->l2);
-    SHADOW2_PRINTK("   l1mfn=%" SH2_PRI_mfn "\n", mfn_x(gw->l1mfn));
-    SHADOW2_PRINTK("   l1e=%p\n", gw->l1e);
-    if ( gw->l1e )
-        SHADOW2_PRINTK("   *l1e=%" SH2_PRI_gpte "\n", gw->l1e->l1);
-    SHADOW2_PRINTK("   eff_l1e=%" SH2_PRI_gpte "\n", gw->eff_l1e.l1);
-}
-
-
-#if SHADOW2_AUDIT & SHADOW2_AUDIT_ENTRIES
-/* Lightweight audit: pass all the shadows associated with this guest walk
- * through the audit mechanisms */
-static void sh2_audit_gw(struct vcpu *v, walk_t *gw) 
-{
-    mfn_t smfn;
-
-    if ( !(SHADOW2_AUDIT_ENABLE) )
-        return;
-
-#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
-#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
-    if ( valid_mfn(gw->l4mfn)
-         && valid_mfn((smfn = get_shadow_status(v, gw->l4mfn, 
-                                                PGC_SH2_l4_shadow))) )
-        (void) sh2_audit_l4_table(v, smfn, _mfn(INVALID_MFN));
-#endif /* PAE or 64... */
-    if ( valid_mfn(gw->l3mfn)
-         && valid_mfn((smfn = get_shadow_status(v, gw->l3mfn, 
-                                                PGC_SH2_l3_shadow))) )
-        (void) sh2_audit_l3_table(v, smfn, _mfn(INVALID_MFN));
-#endif /* All levels... */
-    if ( valid_mfn(gw->l2mfn) )
-    {
-        if ( valid_mfn((smfn = get_shadow_status(v, gw->l2mfn, 
-                                                 PGC_SH2_l2_shadow))) )
-            (void) sh2_audit_l2_table(v, smfn, _mfn(INVALID_MFN));
-#if GUEST_PAGING_LEVELS == 3
-        if ( valid_mfn((smfn = get_shadow_status(v, gw->l2mfn, 
-                                                 PGC_SH2_l2h_shadow))) )
-            (void) sh2_audit_l2_table(v, smfn, _mfn(INVALID_MFN));
-#endif
-    }
-    if ( valid_mfn(gw->l1mfn)
-         && valid_mfn((smfn = get_shadow_status(v, gw->l1mfn, 
-                                                PGC_SH2_l1_shadow))) )
-        (void) sh2_audit_l1_table(v, smfn, _mfn(INVALID_MFN));
-    else if ( gw->l2e
-              && (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE)
-              && valid_mfn( 
-              (smfn = get_fl1_shadow_status(v, guest_l2e_get_gfn(*gw->l2e)))) )
-        (void) sh2_audit_fl1_table(v, smfn, _mfn(INVALID_MFN));
-}
-
-#else
-#define sh2_audit_gw(_v, _gw) do {} while(0)
-#endif /* audit code */
-
-
-
-/**************************************************************************/
-/* Function to write to the guest tables, for propagating accessed and 
- * dirty bits from the shadow to the guest.
- * Takes a guest mfn, a pointer to the guest entry, the level of pagetable,
- * and an operation type.  The guest entry is always passed as an l1e: 
- * since we only ever write flags, that's OK.
- * Returns the new flag bits of the guest entry. */
-
-static u32 guest_set_ad_bits(struct vcpu *v,
-                             mfn_t gmfn, 
-                             guest_l1e_t *ep,
-                             unsigned int level, 
-                             fetch_type_t ft)
-{
-    u32 flags, shflags, bit;
-    struct page_info *pg;
-    int res = 0;
-
-    ASSERT(valid_mfn(gmfn)
-           && (sh2_mfn_is_a_page_table(gmfn)
-               || ((mfn_to_page(gmfn)->u.inuse.type_info & PGT_count_mask) 
-                   == 0)));
-    ASSERT(ep && !(((unsigned long)ep) & ((sizeof *ep) - 1)));
-    ASSERT(level <= GUEST_PAGING_LEVELS);
-    ASSERT(ft == ft_demand_read || ft == ft_demand_write);
-    ASSERT(shadow2_lock_is_acquired(v->domain));
-
-    flags = guest_l1e_get_flags(*ep);
-
-    /* PAE l3s do not have A and D bits */
-    if ( unlikely(GUEST_PAGING_LEVELS == 3 && level == 3) )
-        return flags;
-
-    /* Need the D bit as well for writes, in l1es and 32bit/PAE PSE l2es. */
-    if ( ft == ft_demand_write  
-         && (level == 1 || 
-             (level == 2 && GUEST_PAGING_LEVELS < 4 
-              && (flags & _PAGE_PSE) && guest_supports_superpages(v))) )
-    {
-        if ( (flags & (_PAGE_DIRTY | _PAGE_ACCESSED)) 
-             == (_PAGE_DIRTY | _PAGE_ACCESSED) )
-            return flags;  /* Guest already has A and D bits set */
-        flags |= _PAGE_DIRTY | _PAGE_ACCESSED;
-        perfc_incrc(shadow2_ad_update);
-    }
-    else 
-    {
-        if ( flags & _PAGE_ACCESSED )
-            return flags;  /* Guest already has A bit set */
-        flags |= _PAGE_ACCESSED;
-        perfc_incrc(shadow2_a_update);
-    }
-
-    /* Set the bit(s) */
-    sh2_mark_dirty(v->domain, gmfn);
-    SHADOW2_DEBUG(A_AND_D, "gfn = %"SH2_PRI_gfn", "
-                  "old flags = %#x, new flags = %#x\n", 
-                  guest_l1e_get_gfn(*ep), guest_l1e_get_flags(*ep), flags);
-    *ep = guest_l1e_from_gfn(guest_l1e_get_gfn(*ep), flags);
-    
-    /* May need to propagate this change forward to other kinds of shadow */
-    pg = mfn_to_page(gmfn);
-    if ( !sh2_mfn_is_a_page_table(gmfn) ) 
-    {
-        /* This guest pagetable is not yet shadowed at all. */
-        // MAF: I think this assert is busted...  If this gmfn has not yet
-        // been promoted, then it seems perfectly reasonable for there to be
-        // outstanding type refs to it...
-        /* TJD: No. If the gmfn has not been promoted, we must at least 
-         * have recognised that it is a pagetable, and pulled write access.
-         * The type count should only be non-zero if it is actually a page 
-         * table.  The test above was incorrect, though, so I've fixed it. */
-        ASSERT((pg->u.inuse.type_info & PGT_count_mask) == 0);
-        return flags;  
-    }
-
-    shflags = pg->shadow2_flags & SH2F_page_type_mask;
-    while ( shflags )
-    {
-        bit = find_first_set_bit(shflags);
-        ASSERT(shflags & (1u << bit));
-        shflags &= ~(1u << bit);
-        if ( !(pg->shadow2_flags & (1u << bit)) )
-            continue;
-        switch ( bit )
-        {
-        case PGC_SH2_type_to_index(PGC_SH2_l1_shadow):
-            if (level != 1) 
-                res |= sh2_map_and_validate_gl1e(v, gmfn, ep, sizeof (*ep));
-            break;
-        case PGC_SH2_type_to_index(PGC_SH2_l2_shadow):
-            if (level != 2) 
-                res |= sh2_map_and_validate_gl2e(v, gmfn, ep, sizeof (*ep));
-            break;
-#if GUEST_PAGING_LEVELS == 3 /* PAE only */
-        case PGC_SH2_type_to_index(PGC_SH2_l2h_shadow):
-            if (level != 2) 
-                res |= sh2_map_and_validate_gl2he(v, gmfn, ep, sizeof (*ep));
-            break;
-#endif
-#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
-        case PGC_SH2_type_to_index(PGC_SH2_l3_shadow):
-            if (level != 3) 
-                res |= sh2_map_and_validate_gl3e(v, gmfn, ep, sizeof (*ep));
-            break;
-#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
-        case PGC_SH2_type_to_index(PGC_SH2_l4_shadow):
-            if (level != 4) 
-                res |= sh2_map_and_validate_gl4e(v, gmfn, ep, sizeof (*ep));
-            break;
-#endif 
-#endif
-        default:
-            SHADOW2_ERROR("mfn %"SH2_PRI_mfn" is shadowed in multiple "
-                          "modes: A&D bits may be out of sync (flags=%#x).\n", 
-                          mfn_x(gmfn), pg->shadow2_flags); 
-            /* XXX Shadows in other modes will not be updated, so will
-             * have their A and D bits out of sync. */
-        }
-    }
-    
-    /* We should never need to flush the TLB or recopy PAE entries */
-    ASSERT( res == 0 || res == SHADOW2_SET_CHANGED );
-    return flags;
-}
-
-/**************************************************************************/
-/* Functions to compute the correct index into a shadow page, given an
- * index into the guest page (as returned by guest_get_index()).
- * This is trivial when the shadow and guest use the same sized PTEs, but
- * gets more interesting when those sizes are mismatched (e.g. 32-bit guest,
- * PAE- or 64-bit shadows).
- *
- * These functions also increment the shadow mfn, when necessary.  When PTE
- * sizes are mismatched, it takes 2 shadow L1 pages for a single guest L1
- * page.  In this case, we allocate 2 contiguous pages for the shadow L1, and
- * use simple pointer arithmetic on a pointer to the guest L1e to figure out
- * which shadow page we really want.  Similarly, when PTE sizes are
- * mismatched, we shadow a guest L2 page with 4 shadow L2 pages.  (The easiest
- * way to see this is: a 32-bit guest L2 page maps 4GB of virtual address
- * space, while a PAE- or 64-bit shadow L2 page maps 1GB of virtual address
- * space.)
- *
- * For PAE guests, for every 32-bytes of guest L3 page table, we use 64-bytes
- * of shadow (to store both the shadow, and the info that would normally be
- * stored in page_info fields).  This arrangement allows the shadow and the
- * "page_info" fields to always be stored in the same page (in fact, in
- * the same cache line), avoiding an extra call to map_domain_page().
- */
-
-static inline u32
-guest_index(void *ptr)
-{
-    return (u32)((unsigned long)ptr & ~PAGE_MASK) / sizeof(guest_l1e_t);
-}
-
-static inline u32
-shadow_l1_index(mfn_t *smfn, u32 guest_index)
-{
-#if (GUEST_PAGING_LEVELS == 2) && (SHADOW_PAGING_LEVELS != 2)
-    *smfn = _mfn(mfn_x(*smfn) +
-                 (guest_index / SHADOW_L1_PAGETABLE_ENTRIES));
-    return (guest_index % SHADOW_L1_PAGETABLE_ENTRIES);
-#else
-    return guest_index;
-#endif
-}
-
-static inline u32
-shadow_l2_index(mfn_t *smfn, u32 guest_index)
-{
-#if (GUEST_PAGING_LEVELS == 2) && (SHADOW_PAGING_LEVELS != 2)
-    // Because we use 2 shadow l2 entries for each guest entry, the number of
-    // guest entries per shadow page is SHADOW_L2_PAGETABLE_ENTRIES/2
-    //
-    *smfn = _mfn(mfn_x(*smfn) +
-                 (guest_index / (SHADOW_L2_PAGETABLE_ENTRIES / 2)));
-
-    // We multiple by two to get the index of the first of the two entries
-    // used to shadow the specified guest entry.
-    return (guest_index % (SHADOW_L2_PAGETABLE_ENTRIES / 2)) * 2;
-#else
-    return guest_index;
-#endif
-}
-
-#if GUEST_PAGING_LEVELS >= 3
-
-static inline u32
-shadow_l3_index(mfn_t *smfn, u32 guest_index)
-{
-#if GUEST_PAGING_LEVELS == 3
-    u32 group_id;
-
-    // Because we use twice the space in L3 shadows as was consumed in guest
-    // L3s, the number of guest entries per shadow page is
-    // SHADOW_L2_PAGETABLE_ENTRIES/2.  (Note this is *not*
-    // SHADOW_L3_PAGETABLE_ENTRIES, which in this case is 4...)
-    //
-    *smfn = _mfn(mfn_x(*smfn) +
-                 (guest_index / (SHADOW_L2_PAGETABLE_ENTRIES / 2)));
-
-    // We store PAE L3 shadows in groups of 4, alternating shadows and
-    // pae_l3_bookkeeping structs.  So the effective shadow index is
-    // the the group_id * 8 + the offset within the group.
-    //
-    guest_index %= (SHADOW_L2_PAGETABLE_ENTRIES / 2);
-    group_id = guest_index / 4;
-    return (group_id * 8) + (guest_index % 4);
-#else
-    return guest_index;
-#endif
-}
-
-#endif // GUEST_PAGING_LEVELS >= 3
-
-#if GUEST_PAGING_LEVELS >= 4
-
-static inline u32
-shadow_l4_index(mfn_t *smfn, u32 guest_index)
-{
-    return guest_index;
-}
-
-#endif // GUEST_PAGING_LEVELS >= 4
-
-
-/**************************************************************************/
-/* Functions which compute shadow entries from their corresponding guest
- * entries.
- *
- * These are the "heart" of the shadow code.
- *
- * There are two sets of these: those that are called on demand faults (read
- * faults and write faults), and those that are essentially called to
- * "prefetch" (or propagate) entries from the guest into the shadow.  The read
- * fault and write fault are handled as two separate cases for L1 entries (due
- * to the _PAGE_DIRTY bit handling), but for L[234], they are grouped together
- * into the respective demand_fault functions.
- */
-
-#define CHECK(_cond)                                    \
-do {                                                    \
-    if (unlikely(!(_cond)))                             \
-    {                                                   \
-        printk("%s %s %d ASSERTION (%s) FAILED\n",      \
-               __func__, __FILE__, __LINE__, #_cond);   \
-        return -1;                                      \
-    }                                                   \
-} while (0);
-
-// The function below tries to capture all of the flag manipulation for the
-// demand and propagate functions into one place.
-//
-static always_inline u32
-sh2_propagate_flags(struct vcpu *v, mfn_t target_mfn, 
-                    u32 gflags, guest_l1e_t *guest_entry_ptr, mfn_t gmfn, 
-                    int mmio, int level, fetch_type_t ft)
-{
-    struct domain *d = v->domain;
-    u32 pass_thru_flags;
-    u32 sflags;
-
-    // XXX -- might want to think about PAT support for HVM guests...
-
-#ifndef NDEBUG
-    // MMIO can only occur from L1e's
-    //
-    if ( mmio )
-        CHECK(level == 1);
-
-    // We should always have a pointer to the guest entry if it's a non-PSE
-    // non-MMIO demand access.
-    if ( ft & FETCH_TYPE_DEMAND )
-        CHECK(guest_entry_ptr || level == 1);
-#endif
-
-    // A not-present guest entry has a special signature in the shadow table,
-    // so that we do not have to consult the guest tables multiple times...
-    //
-    if ( unlikely(!(gflags & _PAGE_PRESENT)) )
-        return _PAGE_SHADOW_GUEST_NOT_PRESENT;
-
-    // Must have a valid target_mfn, unless this is mmio, or unless this is a
-    // prefetch.  In the case of a prefetch, an invalid mfn means that we can
-    // not usefully shadow anything, and so we return early.
-    //
-    if ( !valid_mfn(target_mfn) )
-    {
-        CHECK((ft == ft_prefetch) || mmio);
-        if ( !mmio )
-            return 0;
-    }
-
-    // PAE does not allow NX, RW, USER, ACCESSED, or DIRTY bits in its L3e's...
-    //
-    if ( (SHADOW_PAGING_LEVELS == 3) && (level == 3) )
-        pass_thru_flags = _PAGE_PRESENT;
-    else
-    {
-        pass_thru_flags = (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER |
-                           _PAGE_RW | _PAGE_PRESENT);
-        if ( guest_supports_nx(v) )
-            pass_thru_flags |= _PAGE_NX_BIT;
-    }
-
-    // PAE guests can not put NX, RW, USER, ACCESSED, or DIRTY bits into their
-    // L3e's; they are all implied.  So we emulate them here.
-    //
-    if ( (GUEST_PAGING_LEVELS == 3) && (level == 3) )
-        gflags = pass_thru_flags;
-
-    // Propagate bits from the guest to the shadow.
-    // Some of these may be overwritten, below.
-    // Since we know the guest's PRESENT bit is set, we also set the shadow's
-    // SHADOW_PRESENT bit.
-    //
-    sflags = (gflags & pass_thru_flags) | _PAGE_SHADOW_PRESENT;
-
-    // Copy the guest's RW bit into the SHADOW_RW bit.
-    //
-    if ( gflags & _PAGE_RW )
-        sflags |= _PAGE_SHADOW_RW;
-
-    // Set the A&D bits for higher level shadows.
-    // Higher level entries do not, strictly speaking, have dirty bits, but
-    // since we use shadow linear tables, each of these entries may, at some
-    // point in time, also serve as a shadow L1 entry.
-    // By setting both the  A&D bits in each of these, we eliminate the burden
-    // on the hardware to update these bits on initial accesses.
-    //
-    if ( (level > 1) && !((SHADOW_PAGING_LEVELS == 3) && (level == 3)) )
-        sflags |= _PAGE_ACCESSED | _PAGE_DIRTY;
-
-
-    // Set the A and D bits in the guest entry, if we need to.
-    if ( guest_entry_ptr && (ft & FETCH_TYPE_DEMAND) )
-        gflags = guest_set_ad_bits(v, gmfn, guest_entry_ptr, level, ft);
-    
-    // If the A or D bit has not yet been set in the guest, then we must
-    // prevent the corresponding kind of access.
-    //
-    if ( unlikely(!((GUEST_PAGING_LEVELS == 3) && (level == 3)) &&
-                  !(gflags & _PAGE_ACCESSED)) )
-        sflags &= ~_PAGE_PRESENT;
-
-    /* D bits exist in l1es, and 32bit/PAE PSE l2es, but not 64bit PSE l2es */
-    if ( unlikely( ((level == 1) 
-                    || ((level == 2) && (GUEST_PAGING_LEVELS < 4) 
-                        && guest_supports_superpages(v) &&
-                        (gflags & _PAGE_PSE)))
-                   && !(gflags & _PAGE_DIRTY)) )
-        sflags &= ~_PAGE_RW;
-
-    // MMIO caching
-    //
-    // MMIO mappings are marked as not present, but we set the SHADOW_MMIO bit
-    // to cache the fact that this entry  is in MMIO space.
-    //
-    if ( (level == 1) && mmio )
-    {
-        sflags &= ~(_PAGE_PRESENT);
-        sflags |= _PAGE_SHADOW_MMIO;
-    }
-    else 
-    {
-        // shadow2_mode_log_dirty support
-        //
-        // Only allow the guest write access to a page a) on a demand fault,
-        // or b) if the page is already marked as dirty.
-        //
-        if ( unlikely((level == 1) &&
-                      !(ft & FETCH_TYPE_WRITE) &&
-                      shadow2_mode_log_dirty(d) &&
-                      !sh2_mfn_is_dirty(d, target_mfn)) )
-        {
-            sflags &= ~_PAGE_RW;
-        }
-        
-        // protect guest page tables
-        //
-        if ( unlikely((level == 1) &&
-                      sh2_mfn_is_a_page_table(target_mfn)) )
-        {
-            if ( shadow2_mode_trap_reads(d) )
-            {
-                // if we are trapping both reads & writes, then mark this page
-                // as not present...
-                //
-                sflags &= ~_PAGE_PRESENT;
-            }
-            else
-            {
-                // otherwise, just prevent any writes...
-                //
-                sflags &= ~_PAGE_RW;
-            }
-        }
-    }
-
-    return sflags;
-}
-
-#undef CHECK
-
-#if GUEST_PAGING_LEVELS >= 4
-static void
-l4e_propagate_from_guest(struct vcpu *v, 
-                         guest_l4e_t *gl4e,
-                         mfn_t gl4mfn,
-                         mfn_t sl3mfn,
-                         shadow_l4e_t *sl4p,
-                         fetch_type_t ft)
-{
-    u32 gflags = guest_l4e_get_flags(*gl4e);
-    u32 sflags = sh2_propagate_flags(v, sl3mfn, gflags, (guest_l1e_t *) gl4e,
-                                     gl4mfn, 0, 4, ft);
-
-    *sl4p = shadow_l4e_from_mfn(sl3mfn, sflags);
-
-    SHADOW2_DEBUG(PROPAGATE,
-                  "%s gl4e=%" SH2_PRI_gpte " sl4e=%" SH2_PRI_pte "\n",
-                  fetch_type_names[ft], gl4e->l4, sl4p->l4);
-    ASSERT(sflags != -1);
-}
-#endif // GUEST_PAGING_LEVELS >= 4
-
-#if GUEST_PAGING_LEVELS >= 3
-static void
-l3e_propagate_from_guest(struct vcpu *v,
-                         guest_l3e_t *gl3e,
-                         mfn_t gl3mfn, 
-                         mfn_t sl2mfn, 
-                         shadow_l3e_t *sl3p,
-                         fetch_type_t ft)
-{
-    u32 gflags = guest_l3e_get_flags(*gl3e);
-    u32 sflags = sh2_propagate_flags(v, sl2mfn, gflags, (guest_l1e_t *) gl3e,
-                                     gl3mfn, 0, 3, ft);
-
-    *sl3p = shadow_l3e_from_mfn(sl2mfn, sflags);
-
-    SHADOW2_DEBUG(PROPAGATE,
-                  "%s gl3e=%" SH2_PRI_gpte " sl3e=%" SH2_PRI_pte "\n",
-                  fetch_type_names[ft], gl3e->l3, sl3p->l3);
-    ASSERT(sflags != -1);
-}
-#endif // GUEST_PAGING_LEVELS >= 3
-
-static void
-l2e_propagate_from_guest(struct vcpu *v, 
-                         guest_l2e_t *gl2e,
-                         mfn_t gl2mfn,
-                         mfn_t sl1mfn, 
-                         shadow_l2e_t *sl2p,
-                         fetch_type_t ft)
-{
-    u32 gflags = guest_l2e_get_flags(*gl2e);
-    u32 sflags = sh2_propagate_flags(v, sl1mfn, gflags, (guest_l1e_t *) gl2e, 
-                                     gl2mfn, 0, 2, ft);
-
-    *sl2p = shadow_l2e_from_mfn(sl1mfn, sflags);
-
-    SHADOW2_DEBUG(PROPAGATE,
-                  "%s gl2e=%" SH2_PRI_gpte " sl2e=%" SH2_PRI_pte "\n",
-                  fetch_type_names[ft], gl2e->l2, sl2p->l2);
-    ASSERT(sflags != -1);
-}
-
-static inline int
-l1e_read_fault(struct vcpu *v, walk_t *gw, mfn_t gmfn, shadow_l1e_t *sl1p,
-               int mmio)
-/* returns 1 if emulation is required, and 0 otherwise */
-{
-    struct domain *d = v->domain;
-    u32 gflags = guest_l1e_get_flags(gw->eff_l1e);
-    u32 sflags = sh2_propagate_flags(v, gmfn, gflags, gw->l1e, gw->l1mfn,
-                                     mmio, 1, ft_demand_read);
-
-    if ( shadow2_mode_trap_reads(d) && !mmio && sh2_mfn_is_a_page_table(gmfn) )
-    {
-        // emulation required!
-        *sl1p = shadow_l1e_empty();
-        return 1;
-    }
-
-    *sl1p = shadow_l1e_from_mfn(gmfn, sflags);
-
-    SHADOW2_DEBUG(PROPAGATE,
-                  "va=%p eff_gl1e=%" SH2_PRI_gpte " sl1e=%" SH2_PRI_pte "\n",
-                  (void *)gw->va, gw->eff_l1e.l1, sl1p->l1);
-
-    ASSERT(sflags != -1);
-    return 0;
-}
-
-static inline int
-l1e_write_fault(struct vcpu *v, walk_t *gw, mfn_t gmfn, shadow_l1e_t *sl1p,
-                int mmio)
-/* returns 1 if emulation is required, and 0 otherwise */
-{
-    struct domain *d = v->domain;
-    u32 gflags = guest_l1e_get_flags(gw->eff_l1e);
-    u32 sflags = sh2_propagate_flags(v, gmfn, gflags, gw->l1e, gw->l1mfn,
-                                     mmio, 1, ft_demand_write);
-
-    sh2_mark_dirty(d, gmfn);
-
-    if ( !mmio && sh2_mfn_is_a_page_table(gmfn) )
-    {
-        // emulation required!
-        *sl1p = shadow_l1e_empty();
-        return 1;
-    }
-
-    *sl1p = shadow_l1e_from_mfn(gmfn, sflags);
-
-    SHADOW2_DEBUG(PROPAGATE,
-                  "va=%p eff_gl1e=%" SH2_PRI_gpte " sl1e=%" SH2_PRI_pte "\n",
-                  (void *)gw->va, gw->eff_l1e.l1, sl1p->l1);
-
-    ASSERT(sflags != -1);
-    return 0;
-}
-
-static inline void
-l1e_propagate_from_guest(struct vcpu *v, guest_l1e_t gl1e, shadow_l1e_t *sl1p,
-                         int mmio)
-{
-    gfn_t gfn = guest_l1e_get_gfn(gl1e);
-    mfn_t gmfn = (mmio) ? _mfn(gfn_x(gfn)) : vcpu_gfn_to_mfn(v, gfn);
-    u32 gflags = guest_l1e_get_flags(gl1e);
-    u32 sflags = sh2_propagate_flags(v, gmfn, gflags, 0, _mfn(INVALID_MFN), 
-                                     mmio, 1, ft_prefetch);
-
-    *sl1p = shadow_l1e_from_mfn(gmfn, sflags);
-
-    SHADOW2_DEBUG(PROPAGATE,
-                  "gl1e=%" SH2_PRI_gpte " sl1e=%" SH2_PRI_pte "\n",
-                  gl1e.l1, sl1p->l1);
-
-    ASSERT(sflags != -1);
-}
-
-
-/**************************************************************************/
-/* These functions update shadow entries (and do bookkeeping on the shadow
- * tables they are in).  It is intended that they are the only
- * functions which ever write (non-zero) data onto a shadow page.
- *
- * They return a set of flags: 
- * SHADOW2_SET_CHANGED -- we actually wrote a new value to the shadow.
- * SHADOW2_SET_FLUSH   -- the caller must cause a TLB flush.
- * SHADOW2_SET_ERROR   -- the input is not a valid entry (for example, if
- *                        shadow2_get_page_from_l1e() fails).
- * SHADOW2_SET_L3PAE_RECOPY -- one or more vcpu's need to have their local
- *                             copies of their PAE L3 entries re-copied.
- */
-
-static inline void safe_write_entry(void *dst, void *src) 
-/* Copy one PTE safely when processors might be running on the
- * destination pagetable.   This does *not* give safety against
- * concurrent writes (that's what the shadow lock is for), just 
- * stops the hardware picking up partially written entries. */
-{
-    volatile unsigned long *d = dst;
-    unsigned long *s = src;
-    ASSERT(!((unsigned long) d & (sizeof (shadow_l1e_t) - 1)));
-#if CONFIG_PAGING_LEVELS == 3
-    /* In PAE mode, pagetable entries are larger
-     * than machine words, so won't get written atomically.  We need to make
-     * sure any other cpu running on these shadows doesn't see a
-     * half-written entry.  Do this by marking the entry not-present first,
-     * then writing the high word before the low word. */
-    BUILD_BUG_ON(sizeof (shadow_l1e_t) != 2 * sizeof (unsigned long));
-    d[0] = 0;
-    d[1] = s[1];
-    d[0] = s[0];
-#else
-    /* In 32-bit and 64-bit, sizeof(pte) == sizeof(ulong) == 1 word,
-     * which will be an atomic write, since the entry is aligned. */
-    BUILD_BUG_ON(sizeof (shadow_l1e_t) != sizeof (unsigned long));
-    *d = *s;
-#endif
-}
-
-
-static inline void 
-shadow_write_entries(void *d, void *s, int entries, mfn_t mfn)
-/* This function does the actual writes to shadow pages.
- * It must not be called directly, since it doesn't do the bookkeeping
- * that shadow_set_l*e() functions do. */
-{
-    shadow_l1e_t *dst = d;
-    shadow_l1e_t *src = s;
-    void *map = NULL;
-    int i;
-
-    /* Because we mirror access rights at all levels in the shadow, an
-     * l2 (or higher) entry with the RW bit cleared will leave us with
-     * no write access through the linear map.  
-     * We detect that by writing to the shadow with copy_to_user() and 
-     * using map_domain_page() to get a writeable mapping if we need to. */
-    if ( __copy_to_user(d, d, sizeof (unsigned long)) != 0 ) 
-    {
-        perfc_incrc(shadow2_linear_map_failed);
-        map = sh2_map_domain_page(mfn);
-        ASSERT(map != NULL);
-        dst = map + ((unsigned long)dst & (PAGE_SIZE - 1));
-    }
-
-
-    for ( i = 0; i < entries; i++ )
-        safe_write_entry(dst++, src++);
-
-    if ( map != NULL ) sh2_unmap_domain_page(map);
-
-    /* XXX TODO:
-     * Update min/max field in page_info struct of this mfn */
-}
-
-static inline int
-perms_strictly_increased(u32 old_flags, u32 new_flags) 
-/* Given the flags of two entries, are the new flags a strict
- * increase in rights over the old ones? */
-{
-    u32 of = old_flags & (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX);
-    u32 nf = new_flags & (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX);
-    /* Flip the NX bit, since it's the only one that decreases rights;
-     * we calculate as if it were an "X" bit. */
-    of ^= _PAGE_NX_BIT;
-    nf ^= _PAGE_NX_BIT;
-    /* If the changed bits are all set in the new flags, then rights strictly 
-     * increased between old and new. */
-    return ((of | (of ^ nf)) == nf);
-}
-
-static int inline
-shadow2_get_page_from_l1e(shadow_l1e_t sl1e, struct domain *d)
-{
-    int res;
-    mfn_t mfn;
-    struct domain *owner;
-    shadow_l1e_t sanitized_sl1e =
-        shadow_l1e_remove_flags(sl1e, _PAGE_SHADOW_RW | _PAGE_SHADOW_PRESENT);
-
-    //ASSERT(shadow_l1e_get_flags(sl1e) & _PAGE_PRESENT);
-    //ASSERT((shadow_l1e_get_flags(sl1e) & L1_DISALLOW_MASK) == 0);
-
-    if ( !shadow2_mode_refcounts(d) )
-        return 1;
-
-    res = get_page_from_l1e(sanitized_sl1e, d);
-
-    // If a privileged domain is attempting to install a map of a page it does
-    // not own, we let it succeed anyway.
-    //
-    if ( unlikely(!res) &&
-         IS_PRIV(d) &&
-         !shadow2_mode_translate(d) &&
-         valid_mfn(mfn = shadow_l1e_get_mfn(sl1e)) &&
-         (owner = page_get_owner(mfn_to_page(mfn))) &&
-         (d != owner) )
-    {
-        res = get_page_from_l1e(sanitized_sl1e, owner);
-        SHADOW2_PRINTK("privileged domain %d installs map of mfn %05lx "
-                       "which is owned by domain %d: %s\n",
-                       d->domain_id, mfn_x(mfn), owner->domain_id,
-                       res ? "success" : "failed");
-    }
-
-    if ( unlikely(!res) )
-    {
-        perfc_incrc(shadow2_get_page_fail);
-        SHADOW2_PRINTK("failed: l1e=" SH2_PRI_pte "\n");
-    }
-
-    return res;
-}
-
-static void inline
-shadow2_put_page_from_l1e(shadow_l1e_t sl1e, struct domain *d)
-{ 
-    if ( !shadow2_mode_refcounts(d) )
-        return;
-
-    put_page_from_l1e(sl1e, d);
-}
-
-#if GUEST_PAGING_LEVELS >= 4
-static int shadow_set_l4e(struct vcpu *v, 
-                          shadow_l4e_t *sl4e, 
-                          shadow_l4e_t new_sl4e, 
-                          mfn_t sl4mfn)
-{
-    int flags = 0;
-    shadow_l4e_t old_sl4e;
-    paddr_t paddr;
-    ASSERT(sl4e != NULL);
-    old_sl4e = *sl4e;
-
-    if ( old_sl4e.l4 == new_sl4e.l4 ) return 0; /* Nothing to do */
-    
-    paddr = ((((paddr_t)mfn_x(sl4mfn)) << PAGE_SHIFT) 
-             | (((unsigned long)sl4e) & ~PAGE_MASK));
-
-    if ( shadow_l4e_get_flags(new_sl4e) & _PAGE_PRESENT ) 
-    {
-        /* About to install a new reference */        
-        sh2_get_ref(shadow_l4e_get_mfn(new_sl4e), paddr);
-    } 
-
-    /* Write the new entry */
-    shadow_write_entries(sl4e, &new_sl4e, 1, sl4mfn);
-    flags |= SHADOW2_SET_CHANGED;
-
-    if ( shadow_l4e_get_flags(old_sl4e) & _PAGE_PRESENT ) 
-    {
-        /* We lost a reference to an old mfn. */
-        mfn_t osl3mfn = shadow_l4e_get_mfn(old_sl4e);
-        if ( (mfn_x(osl3mfn) != mfn_x(shadow_l4e_get_mfn(new_sl4e)))
-             || !perms_strictly_increased(shadow_l4e_get_flags(old_sl4e), 
-                                          shadow_l4e_get_flags(new_sl4e)) )
-        {
-            flags |= SHADOW2_SET_FLUSH;
-        }
-        sh2_put_ref(v, osl3mfn, paddr);
-    }
-    return flags;
-}
-#endif /* GUEST_PAGING_LEVELS >= 4 */
-
-#if GUEST_PAGING_LEVELS >= 3
-static int shadow_set_l3e(struct vcpu *v, 
-                          shadow_l3e_t *sl3e, 
-                          shadow_l3e_t new_sl3e, 
-                          mfn_t sl3mfn)
-{
-    int flags = 0;
-    shadow_l3e_t old_sl3e;
-    paddr_t paddr;
-    ASSERT(sl3e != NULL);
-    old_sl3e = *sl3e;
-
-    if ( old_sl3e.l3 == new_sl3e.l3 ) return 0; /* Nothing to do */
-
-    paddr = ((((paddr_t)mfn_x(sl3mfn)) << PAGE_SHIFT) 
-             | (((unsigned long)sl3e) & ~PAGE_MASK));
-    
-    if ( shadow_l3e_get_flags(new_sl3e) & _PAGE_PRESENT ) 
-    {
-        /* About to install a new reference */        
-        sh2_get_ref(shadow_l3e_get_mfn(new_sl3e), paddr);
-    } 
-
-    /* Write the new entry */
-    shadow_write_entries(sl3e, &new_sl3e, 1, sl3mfn);
-    flags |= SHADOW2_SET_CHANGED;
-
-#if GUEST_PAGING_LEVELS == 3 
-    /* We wrote a guest l3e in a PAE pagetable.  This table is copied in
-     * the linear pagetable entries of its l2s, and may also be copied
-     * to a low memory location to make it fit in CR3.  Report that we
-     * need to resync those copies (we can't wait for the guest to flush
-     * the TLB because it might be an increase in rights). */
-    {
-        struct vcpu *vcpu;
-
-        struct pae_l3_bookkeeping *info = sl3p_to_info(sl3e);
-        for_each_vcpu(v->domain, vcpu)
-        {
-            if (info->vcpus & (1 << vcpu->vcpu_id))
-            {
-                // Remember that this flip/update needs to occur.
-                vcpu->arch.shadow2.pae_flip_pending = 1;
-                flags |= SHADOW2_SET_L3PAE_RECOPY;
-            }
-        }
-    }
-#endif
-
-    if ( shadow_l3e_get_flags(old_sl3e) & _PAGE_PRESENT ) 
-    {
-        /* We lost a reference to an old mfn. */
-        mfn_t osl2mfn = shadow_l3e_get_mfn(old_sl3e);
-        if ( (mfn_x(osl2mfn) != mfn_x(shadow_l3e_get_mfn(new_sl3e))) ||
-             !perms_strictly_increased(shadow_l3e_get_flags(old_sl3e), 
-                                       shadow_l3e_get_flags(new_sl3e)) ) 
-        {
-            flags |= SHADOW2_SET_FLUSH;
-        }
-        sh2_put_ref(v, osl2mfn, paddr);
-    }
-    return flags;
-}
-#endif /* GUEST_PAGING_LEVELS >= 3 */ 
-
-static int shadow_set_l2e(struct vcpu *v, 
-                          shadow_l2e_t *sl2e, 
-                          shadow_l2e_t new_sl2e, 
-                          mfn_t sl2mfn)
-{
-    int flags = 0;
-    shadow_l2e_t old_sl2e;
-    paddr_t paddr;
-
-#if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
-    /* In 2-on-3 we work with pairs of l2es pointing at two-page
-     * shadows.  Reference counting and up-pointers track from the first
-     * page of the shadow to the first l2e, so make sure that we're 
-     * working with those:     
-     * Align the pointer down so it's pointing at the first of the pair */
-    sl2e = (shadow_l2e_t *)((unsigned long)sl2e & ~(sizeof(shadow_l2e_t)));
-    /* Align the mfn of the shadow entry too */
-    new_sl2e.l2 &= ~(1<<PAGE_SHIFT);
-#endif
-
-    ASSERT(sl2e != NULL);
-    old_sl2e = *sl2e;
-    
-    if ( old_sl2e.l2 == new_sl2e.l2 ) return 0; /* Nothing to do */
-    
-    paddr = ((((paddr_t)mfn_x(sl2mfn)) << PAGE_SHIFT)
-             | (((unsigned long)sl2e) & ~PAGE_MASK));
-
-    if ( shadow_l2e_get_flags(new_sl2e) & _PAGE_PRESENT ) 
-    {
-        /* About to install a new reference */
-        sh2_get_ref(shadow_l2e_get_mfn(new_sl2e), paddr);
-    } 
-
-    /* Write the new entry */
-#if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
-    {
-        shadow_l2e_t pair[2] = { new_sl2e, new_sl2e };
-        /* The l1 shadow is two pages long and need to be pointed to by
-         * two adjacent l1es.  The pair have the same flags, but point
-         * at odd and even MFNs */
-        ASSERT(!(pair[0].l2 & (1<<PAGE_SHIFT)));
-        pair[1].l2 |= (1<<PAGE_SHIFT);
-        shadow_write_entries(sl2e, &pair, 2, sl2mfn);
-    }
-#else /* normal case */
-    shadow_write_entries(sl2e, &new_sl2e, 1, sl2mfn);
-#endif
-    flags |= SHADOW2_SET_CHANGED;
-
-    if ( shadow_l2e_get_flags(old_sl2e) & _PAGE_PRESENT ) 
-    {
-        /* We lost a reference to an old mfn. */
-        mfn_t osl1mfn = shadow_l2e_get_mfn(old_sl2e);
-        if ( (mfn_x(osl1mfn) != mfn_x(shadow_l2e_get_mfn(new_sl2e))) ||
-             !perms_strictly_increased(shadow_l2e_get_flags(old_sl2e), 
-                                       shadow_l2e_get_flags(new_sl2e)) ) 
-        {
-            flags |= SHADOW2_SET_FLUSH;
-        }
-        sh2_put_ref(v, osl1mfn, paddr);
-    }
-    return flags;
-}
-
-static int shadow_set_l1e(struct vcpu *v, 
-                          shadow_l1e_t *sl1e, 
-                          shadow_l1e_t new_sl1e,
-                          mfn_t sl1mfn)
-{
-    int flags = 0;
-    struct domain *d = v->domain;
-    shadow_l1e_t old_sl1e;
-    ASSERT(sl1e != NULL);
-    
-    old_sl1e = *sl1e;
-
-    if ( old_sl1e.l1 == new_sl1e.l1 ) return 0; /* Nothing to do */
-    
-    if ( shadow_l1e_get_flags(new_sl1e) & _PAGE_PRESENT ) 
-    {
-        /* About to install a new reference */        
-        if ( shadow2_mode_refcounts(d) ) {
-            if ( shadow2_get_page_from_l1e(new_sl1e, d) == 0 ) 
-            {
-                /* Doesn't look like a pagetable. */
-                flags |= SHADOW2_SET_ERROR;
-                new_sl1e = shadow_l1e_empty();
-            }
-        }
-    } 
-
-    /* Write the new entry */
-    shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
-    flags |= SHADOW2_SET_CHANGED;
-
-    if ( shadow_l1e_get_flags(old_sl1e) & _PAGE_PRESENT ) 
-    {
-        /* We lost a reference to an old mfn. */
-        /* N.B. Unlike higher-level sets, never need an extra flush 
-         * when writing an l1e.  Because it points to the same guest frame 
-         * as the guest l1e did, it's the guest's responsibility to
-         * trigger a flush later. */
-        if ( shadow2_mode_refcounts(d) ) 
-        {
-            shadow2_put_page_from_l1e(old_sl1e, d);
-        } 
-    }
-    return flags;
-}
-
-
-/**************************************************************************/
-/* These functions take a vcpu and a virtual address, and return a pointer
- * to the appropriate level N entry from the shadow tables.  
- * If the necessary tables are not present in the shadow, they return NULL. */
-
-/* N.B. The use of GUEST_PAGING_LEVELS here is correct.  If the shadow has
- * more levels than the guest, the upper levels are always fixed and do not 
- * reflect any information from the guest, so we do not use these functions 
- * to access them. */
-
-#if GUEST_PAGING_LEVELS >= 4
-static shadow_l4e_t *
-shadow_get_l4e(struct vcpu *v, unsigned long va)
-{
-    /* Reading the top level table is always valid. */
-    return sh2_linear_l4_table(v) + shadow_l4_linear_offset(va);
-}
-#endif /* GUEST_PAGING_LEVELS >= 4 */
-
-
-#if GUEST_PAGING_LEVELS >= 3
-static shadow_l3e_t *
-shadow_get_l3e(struct vcpu *v, unsigned long va)
-{
-#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
-    /* Get the l4 */
-    shadow_l4e_t *sl4e = shadow_get_l4e(v, va);
-    ASSERT(sl4e != NULL);
-    if ( !(shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT) )
-        return NULL;
-    ASSERT(valid_mfn(shadow_l4e_get_mfn(*sl4e)));
-    /* l4 was present; OK to get the l3 */
-    return sh2_linear_l3_table(v) + shadow_l3_linear_offset(va);
-#else /* PAE... */
-    /* Top level is always mapped */
-    ASSERT(v->arch.shadow_vtable);
-    return ((shadow_l3e_t *)v->arch.shadow_vtable) + shadow_l3_linear_offset(va);
-#endif 
-}
-#endif /* GUEST_PAGING_LEVELS >= 3 */
-
-
-static shadow_l2e_t *
-shadow_get_l2e(struct vcpu *v, unsigned long va)
-{
-#if GUEST_PAGING_LEVELS >= 3  /* 64bit/PAE... */
-    /* Get the l3 */
-    shadow_l3e_t *sl3e = shadow_get_l3e(v, va);
-    if ( sl3e == NULL || !(shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) )
-        return NULL;
-    ASSERT(valid_mfn(shadow_l3e_get_mfn(*sl3e)));
-    /* l3 was present; OK to get the l2 */
-#endif
-    return sh2_linear_l2_table(v) + shadow_l2_linear_offset(va);
-}
-
-
-#if 0 // avoid the compiler warning for now...
-
-static shadow_l1e_t *
-shadow_get_l1e(struct vcpu *v, unsigned long va)
-{
-    /* Get the l2 */
-    shadow_l2e_t *sl2e = shadow_get_l2e(v, va);
-    if ( sl2e == NULL || !(shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT) )
-        return NULL;
-    ASSERT(valid_mfn(shadow_l2e_get_mfn(*sl2e)));
-    /* l2 was present; OK to get the l1 */
-    return sh2_linear_l1_table(v) + shadow_l1_linear_offset(va);
-}
-
-#endif
-
-
-/**************************************************************************/
-/* Macros to walk pagetables.  These take the shadow of a pagetable and 
- * walk every "interesting" entry.  That is, they don't touch Xen mappings, 
- * and for 32-bit l2s shadowed onto PAE or 64-bit, they only touch every 
- * second entry (since pairs of entries are managed together). For multi-page
- * shadows they walk all pages.
- * 
- * Arguments are an MFN, the variable to point to each entry, a variable 
- * to indicate that we are done (we will shortcut to the end of the scan 
- * when _done != 0), a variable to indicate that we should avoid Xen mappings,
- * and the code. 
- *
- * WARNING: These macros have side-effects.  They change the values of both 
- * the pointer and the MFN. */ 
-
-static inline void increment_ptr_to_guest_entry(void *ptr)
-{
-    if ( ptr )
-    {
-        guest_l1e_t **entry = ptr;
-        (*entry)++;
-    }
-}
-
-/* All kinds of l1: touch all entries */
-#define _SHADOW2_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code)       \
-do {                                                                    \
-    int _i;                                                             \
-    shadow_l1e_t *_sp = map_shadow_page((_sl1mfn));                     \
-    ASSERT((mfn_to_page(_sl1mfn)->count_info & PGC_SH2_type_mask)       \
-           == PGC_SH2_l1_shadow                                         \
-           || (mfn_to_page(_sl1mfn)->count_info & PGC_SH2_type_mask)    \
-           == PGC_SH2_fl1_shadow);                                      \
-    for ( _i = 0; _i < SHADOW_L1_PAGETABLE_ENTRIES; _i++ )              \
-    {                                                                   \
-        (_sl1e) = _sp + _i;                                             \
-        if ( shadow_l1e_get_flags(*(_sl1e)) & _PAGE_PRESENT )           \
-            {_code}                                                     \
-        if ( _done ) break;                                             \
-        increment_ptr_to_guest_entry(_gl1p);                            \
-    }                                                                   \
-    unmap_shadow_page(_sp);                                             \
-} while (0)
-
-/* 32-bit l1, on PAE or 64-bit shadows: need to walk both pages of shadow */
-#if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
-#define SHADOW2_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done,  _code)       \
-do {                                                                    \
-    int __done = 0;                                                     \
-    _SHADOW2_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p,                         \
-                         ({ (__done = _done); }), _code);               \
-    _sl1mfn = _mfn(mfn_x(_sl1mfn) + 1);                                 \
-    if ( !__done )                                                      \
-        _SHADOW2_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p,                     \
-                             ({ (__done = _done); }), _code);           \
-} while (0)
-#else /* Everything else; l1 shadows are only one page */
-#define SHADOW2_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code)        \
-       _SHADOW2_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code)
-#endif
-    
-
-#if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
-
-/* 32-bit l2 on PAE/64: four pages, touch every second entry, and avoid Xen */
-#define SHADOW2_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _xen, _code)    \
-do {                                                                      \
-    int _i, _j, __done = 0;                                               \
-    ASSERT((mfn_to_page(_sl2mfn)->count_info & PGC_SH2_type_mask)         \
-           == PGC_SH2_l2_32_shadow);                                      \
-    for ( _j = 0; _j < 4 && !__done; _j++ )                               \
-    {                                                                     \
-        shadow_l2e_t *_sp = map_shadow_page(_sl2mfn);                     \
-        for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i += 2 )         \
-            if ( (!(_xen))                                                \
-                 || ((_j * SHADOW_L2_PAGETABLE_ENTRIES) + _i)             \
-                 < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT) ) \
-            {                                                             \
-                (_sl2e) = _sp + _i;                                       \
-                if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT )     \
-                    {_code}                                               \
-                if ( (__done = (_done)) ) break;                          \
-                increment_ptr_to_guest_entry(_gl2p);                      \
-            }                                                             \
-        unmap_shadow_page(_sp);                                           \
-        _sl2mfn = _mfn(mfn_x(_sl2mfn) + 1);                               \
-    }                                                                     \
-} while (0)
-
-#elif GUEST_PAGING_LEVELS == 2
-
-/* 32-bit on 32-bit: avoid Xen entries */
-#define SHADOW2_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _xen, _code)     \
-do {                                                                       \
-    int _i;                                                                \
-    shadow_l2e_t *_sp = map_shadow_page((_sl2mfn));                        \
-    ASSERT((mfn_to_page(_sl2mfn)->count_info & PGC_SH2_type_mask)          \
-           == PGC_SH2_l2_32_shadow);                                       \
-    for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ )                 \
-        if ( (!(_xen))                                                     \
-             ||                                                            \
-             (_i < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT)) ) \
-        {                                                                  \
-            (_sl2e) = _sp + _i;                                            \
-            if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT )          \
-                {_code}                                                    \
-            if ( _done ) break;                                            \
-            increment_ptr_to_guest_entry(_gl2p);                           \
-        }                                                                  \
-    unmap_shadow_page(_sp);                                                \
-} while (0)
-
-#elif GUEST_PAGING_LEVELS == 3
-
-/* PAE: if it's an l2h, don't touch Xen mappings */
-#define SHADOW2_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _xen, _code)     \
-do {                                                                       \
-    int _i;                                                                \
-    shadow_l2e_t *_sp = map_shadow_page((_sl2mfn));                        \
-    ASSERT((mfn_to_page(_sl2mfn)->count_info & PGC_SH2_type_mask)          \
-           == PGC_SH2_l2_pae_shadow                                        \
-           || (mfn_to_page(_sl2mfn)->count_info & PGC_SH2_type_mask)       \
-           == PGC_SH2_l2h_pae_shadow);                                     \
-    for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ )                 \
-        if ( (!(_xen))                                                     \
-             || ((mfn_to_page(_sl2mfn)->count_info & PGC_SH2_type_mask)    \
-                 != PGC_SH2_l2h_pae_shadow)                                \
-             || ((_i + (3 * SHADOW_L2_PAGETABLE_ENTRIES))                  \
-                 < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT)) ) \
-        {                                                                  \
-            (_sl2e) = _sp + _i;                                            \
-            if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT )          \
-                {_code}                                                    \
-            if ( _done ) break;                                            \
-            increment_ptr_to_guest_entry(_gl2p);                           \
-        }                                                                  \
-    unmap_shadow_page(_sp);                                                \
-} while (0)
-
-#else 
-
-/* 64-bit l2: touch all entries */
-#define SHADOW2_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _xen, _code)  \
-do {                                                                    \
-    int _i;                                                             \
-    shadow_l2e_t *_sp = map_shadow_page((_sl2mfn));                     \
-    ASSERT((mfn_to_page(_sl2mfn)->count_info & PGC_SH2_type_mask)       \
-           == PGC_SH2_l2_64_shadow);                                    \
-    for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ )              \
-    {                                                                   \
-        (_sl2e) = _sp + _i;                                             \
-        if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT )           \
-            {_code}                                                     \
-        if ( _done ) break;                                             \
-        increment_ptr_to_guest_entry(_gl2p);                            \
-    }                                                                   \
-    unmap_shadow_page(_sp);                                             \
-} while (0)
-
-#endif /* different kinds of l2 */
-
-#if GUEST_PAGING_LEVELS == 3
-
-/* PAE l3 subshadow: touch all entries (FOREACH_L2E will find Xen l2es). */
-#define SHADOW2_FOREACH_L3E_SUB(_sl3e, _gl3p, _done, _code)             \
-do {                                                                    \
-    int _i;                                                             \
-    for ( _i = 0; _i < 4; _i++ )                                        \
-    {                                                                   \
-        if ( shadow_l3e_get_flags(*(_sl3e)) & _PAGE_PRESENT )           \
-            {_code}                                                     \
-        if ( _done ) break;                                             \
-        _sl3e++;                                                        \
-        increment_ptr_to_guest_entry(_gl3p);                            \
-    }                                                                   \
-} while (0)
-
-/* PAE l3 full shadow: call subshadow walk on all valid l3 subshadows */
-#define SHADOW2_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code)        \
-do {                                                                    \
-    int _i, _j, _k, __done = 0;                                         \
-    ASSERT((mfn_to_page(_sl3mfn)->count_info & PGC_SH2_type_mask)       \
-           == PGC_SH2_l3_pae_shadow);                                   \
-    /* The subshadows are split, 64 on each page of the shadow */       \
-    for ( _j = 0; _j < 2 && !__done; _j++ )                             \
-    {                                                                   \
-        void *_sp = sh2_map_domain_page(_sl3mfn);                       \
-        for ( _i = 0; _i < 64; _i++ )                                   \
-        {                                                               \
-            /* Every second 32-byte region is a bookkeeping entry */    \
-            _sl3e = (shadow_l3e_t *)(_sp + (64 * _i));                  \
-            if ( (sl3p_to_info(_sl3e))->refcount > 0 )                  \
-                SHADOW2_FOREACH_L3E_SUB(_sl3e, _gl3p,                   \
-                                        ({ __done = (_done); __done; }), \
-                                        _code);                         \
-            else                                                        \
-                for ( _k = 0 ; _k < 4 ; _k++ )                          \
-                    increment_ptr_to_guest_entry(_gl3p);                \
-            if ( __done ) break;                                        \
-        }                                                               \
-        sh2_unmap_domain_page(_sp);                                     \
-        _sl3mfn = _mfn(mfn_x(_sl3mfn) + 1);                             \
-    }                                                                   \
-} while (0)
-
-#elif GUEST_PAGING_LEVELS == 4
-
-/* 64-bit l3: touch all entries */
-#define SHADOW2_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code)        \
-do {                                                                    \
-    int _i;                                                             \
-    shadow_l3e_t *_sp = map_shadow_page((_sl3mfn));                     \
-    ASSERT((mfn_to_page(_sl3mfn)->count_info & PGC_SH2_type_mask)       \
-           == PGC_SH2_l3_64_shadow);                                    \
-    for ( _i = 0; _i < SHADOW_L3_PAGETABLE_ENTRIES; _i++ )              \
-    {                                                                   \
-        (_sl3e) = _sp + _i;                                             \
-        if ( shadow_l3e_get_flags(*(_sl3e)) & _PAGE_PRESENT )           \
-            {_code}                                                     \
-        if ( _done ) break;                                             \
-        increment_ptr_to_guest_entry(_gl3p);                            \
-    }                                                                   \
-    unmap_shadow_page(_sp);                                             \
-} while (0)
-
-/* 64-bit l4: avoid Xen mappings */
-#define SHADOW2_FOREACH_L4E(_sl4mfn, _sl4e, _gl4p, _done, _xen, _code)  \
-do {                                                                    \
-    int _i;                                                             \
-    shadow_l4e_t *_sp = map_shadow_page((_sl4mfn));                     \
-    ASSERT((mfn_to_page(_sl4mfn)->count_info & PGC_SH2_type_mask)       \
-           == PGC_SH2_l4_64_shadow);                                    \
-    for ( _i = 0; _i < SHADOW_L4_PAGETABLE_ENTRIES; _i++ )              \
-    {                                                                   \
-        if ( (!(_xen)) || is_guest_l4_slot(_i) )                        \
-        {                                                               \
-            (_sl4e) = _sp + _i;                                         \
-            if ( shadow_l4e_get_flags(*(_sl4e)) & _PAGE_PRESENT )       \
-                {_code}                                                 \
-            if ( _done ) break;                                         \
-        }                                                               \
-        increment_ptr_to_guest_entry(_gl4p);                            \
-    }                                                                   \
-    unmap_shadow_page(_sp);                                             \
-} while (0)
-
-#endif
-
-
-
-/**************************************************************************/
-/* Functions to install Xen mappings and linear mappings in shadow pages */
-
-static mfn_t sh2_make_shadow(struct vcpu *v, mfn_t gmfn, u32 shadow_type);
-
-// XXX -- this function should probably be moved to shadow2-common.c, but that
-//        probably wants to wait until the shadow types have been moved from
-//        shadow2-types.h to shadow2-private.h
-//
-#if CONFIG_PAGING_LEVELS == 4 && GUEST_PAGING_LEVELS == 4
-void sh2_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn)
-{
-    struct domain *d = v->domain;
-    shadow_l4e_t *sl4e;
-
-    sl4e = sh2_map_domain_page(sl4mfn);
-    ASSERT(sl4e != NULL);
-    ASSERT(sizeof (l4_pgentry_t) == sizeof (shadow_l4e_t));
-    
-    /* Copy the common Xen mappings from the idle domain */
-    memcpy(&sl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
-           &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
-           ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
-
-    /* Install the per-domain mappings for this domain */
-    sl4e[shadow_l4_table_offset(PERDOMAIN_VIRT_START)] =
-        shadow_l4e_from_mfn(page_to_mfn(virt_to_page(d->arch.mm_perdomain_l3)),
-                            __PAGE_HYPERVISOR);
-
-    /* Linear mapping */
-    sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] =
-        shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR);
-    sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] =
-        shadow_l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR);
-
-    if ( shadow2_mode_translate(v->domain) )
-    {
-        /* install domain-specific P2M table */
-        sl4e[shadow_l4_table_offset(RO_MPT_VIRT_START)] =
-            shadow_l4e_from_mfn(pagetable_get_mfn(d->arch.phys_table),
-                                __PAGE_HYPERVISOR);
-    }
-
-    sh2_unmap_domain_page(sl4e);    
-}
-#endif
-
-#if CONFIG_PAGING_LEVELS == 3 && GUEST_PAGING_LEVELS == 3
-// For 3-on-3 PV guests, we need to make sure the xen mappings are in
-// place, which means that we need to populate the l2h entry in the l3
-// table.
-
-void sh2_install_xen_entries_in_l2h(struct vcpu *v, 
-                                    mfn_t sl2hmfn)
-{
-    struct domain *d = v->domain;
-    shadow_l2e_t *sl2e;
-    int i;
-
-    sl2e = sh2_map_domain_page(sl2hmfn);
-    ASSERT(sl2e != NULL);
-    ASSERT(sizeof (l2_pgentry_t) == sizeof (shadow_l2e_t));
-    
-    /* Copy the common Xen mappings from the idle domain */
-    memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
-           &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
-           L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
-
-    /* Install the per-domain mappings for this domain */
-    for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
-        sl2e[shadow_l2_table_offset(PERDOMAIN_VIRT_START) + i] =
-            shadow_l2e_from_mfn(
-                page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i),
-                __PAGE_HYPERVISOR);
-    
-    /* We don't set up a linear mapping here because we can't until this
-     * l2h is installed in an l3e.  sh2_update_linear_entries() handles
-     * the linear mappings when the l3 is loaded. */
-
-    if ( shadow2_mode_translate(d) )
-    {
-        /* Install the domain-specific p2m table */
-        l3_pgentry_t *p2m;
-        ASSERT(pagetable_get_pfn(d->arch.phys_table) != 0);
-        p2m = sh2_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
-        for ( i = 0; i < MACHPHYS_MBYTES>>1; i++ )
-        {
-            sl2e[shadow_l2_table_offset(RO_MPT_VIRT_START) + i] =
-                shadow_l2e_from_mfn(_mfn(l3e_get_pfn(p2m[i])),
-                                    __PAGE_HYPERVISOR);
-        }
-        sh2_unmap_domain_page(p2m);
-    }
-    
-    sh2_unmap_domain_page(sl2e);
-}
-
-void sh2_install_xen_entries_in_l3(struct vcpu *v, mfn_t gl3mfn, mfn_t sl3mfn)
-{
-    shadow_l3e_t *sl3e;
-    guest_l3e_t *gl3e = v->arch.guest_vtable;
-    shadow_l3e_t new_sl3e;
-    gfn_t l2gfn;
-    mfn_t l2gmfn, l2smfn;
-    int r;
-
-    ASSERT(!shadow2_mode_external(v->domain));
-    ASSERT(guest_l3e_get_flags(gl3e[3]) & _PAGE_PRESENT);
-    l2gfn = guest_l3e_get_gfn(gl3e[3]);
-    l2gmfn = sh2_gfn_to_mfn(v->domain, gfn_x(l2gfn));
-    l2smfn = get_shadow_status(v, l2gmfn, PGC_SH2_l2h_shadow);
-    if ( !valid_mfn(l2smfn) )
-    {
-        l2smfn = sh2_make_shadow(v, l2gmfn, PGC_SH2_l2h_shadow);
-    }
-    l3e_propagate_from_guest(v, &gl3e[3], gl3mfn, l2smfn, &new_sl3e,
-                             ft_prefetch);
-    sl3e = sh2_map_domain_page(sl3mfn);
-    r = shadow_set_l3e(v, &sl3e[3], new_sl3e, sl3mfn);
-    sh2_unmap_domain_page(sl3e);
-}
-#endif
-
-
-#if CONFIG_PAGING_LEVELS == 2 && GUEST_PAGING_LEVELS == 2
-void sh2_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn)
-{
-    struct domain *d = v->domain;
-    shadow_l2e_t *sl2e;
-    int i;
-
-    sl2e = sh2_map_domain_page(sl2mfn);
-    ASSERT(sl2e != NULL);
-    ASSERT(sizeof (l2_pgentry_t) == sizeof (shadow_l2e_t));
-    
-    /* Copy the common Xen mappings from the idle domain */
-    memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT],
-           &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT],
-           L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
-
-    /* Install the per-domain mappings for this domain */
-    for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
-        sl2e[shadow_l2_table_offset(PERDOMAIN_VIRT_START) + i] =
-            shadow_l2e_from_mfn(
-                page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i),
-                __PAGE_HYPERVISOR);
-
-    /* Linear mapping */
-    sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START)] =
-        shadow_l2e_from_mfn(gl2mfn, __PAGE_HYPERVISOR);
-    sl2e[shadow_l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
-        shadow_l2e_from_mfn(sl2mfn, __PAGE_HYPERVISOR);
-
-    if ( shadow2_mode_translate(d) )
-    {
-        /* install domain-specific P2M table */
-        sl2e[shadow_l2_table_offset(RO_MPT_VIRT_START)] =
-            shadow_l2e_from_mfn(pagetable_get_mfn(d->arch.phys_table),
-                                __PAGE_HYPERVISOR);
-    }
-
-    sh2_unmap_domain_page(sl2e);
-}
-#endif
-
-
-
-
-
-/**************************************************************************/
-/* Create a shadow of a given guest page.
- */
-static mfn_t
-sh2_make_shadow(struct vcpu *v, mfn_t gmfn, u32 shadow_type)
-{
-    mfn_t smfn = shadow2_alloc(v->domain, shadow_type, mfn_x(gmfn));
-    SHADOW2_DEBUG(MAKE_SHADOW, "(%05lx, %u)=>%05lx\n",
-                  mfn_x(gmfn), shadow_type, mfn_x(smfn));
-
-    if ( shadow_type != PGC_SH2_guest_root_type )
-        /* Lower-level shadow, not yet linked form a higher level */
-        mfn_to_page(smfn)->up = 0;
-
-    // Create the Xen mappings...
-    if ( !shadow2_mode_external(v->domain) )
-    {
-        switch (shadow_type) 
-        {
-#if CONFIG_PAGING_LEVELS == 4 && GUEST_PAGING_LEVELS == 4
-        case PGC_SH2_l4_shadow:
-            sh2_install_xen_entries_in_l4(v, gmfn, smfn); break;
-#endif
-#if CONFIG_PAGING_LEVELS == 3 && GUEST_PAGING_LEVELS == 3
-        case PGC_SH2_l3_shadow:
-            sh2_install_xen_entries_in_l3(v, gmfn, smfn); break;
-        case PGC_SH2_l2h_shadow:
-            sh2_install_xen_entries_in_l2h(v, smfn); break;
-#endif
-#if CONFIG_PAGING_LEVELS == 2 && GUEST_PAGING_LEVELS == 2
-        case PGC_SH2_l2_shadow:
-            sh2_install_xen_entries_in_l2(v, gmfn, smfn); break;
-#endif
-        default: /* Do nothing */ break;
-        }
-    }
-    
-    shadow2_promote(v, gmfn, shadow_type);
-    set_shadow2_status(v, gmfn, shadow_type, smfn);
-
-    return smfn;
-}
-
-/* Make a splintered superpage shadow */
-static mfn_t
-make_fl1_shadow(struct vcpu *v, gfn_t gfn)
-{
-    mfn_t smfn = shadow2_alloc(v->domain, PGC_SH2_fl1_shadow,
-                               (unsigned long) gfn_x(gfn));
-
-    SHADOW2_DEBUG(MAKE_SHADOW, "(%" SH2_PRI_gfn ")=>%" SH2_PRI_mfn "\n",
-                  gfn_x(gfn), mfn_x(smfn));
-
-    set_fl1_shadow_status(v, gfn, smfn);
-    return smfn;
-}
-
-
-#if SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS
-mfn_t
-sh2_make_monitor_table(struct vcpu *v)
-{
-
-    ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0);
-    
-#if CONFIG_PAGING_LEVELS == 4    
-    {
-        struct domain *d = v->domain;
-        mfn_t m4mfn;
-        m4mfn = shadow2_alloc(d, PGC_SH2_monitor_table, 0);
-        sh2_install_xen_entries_in_l4(v, m4mfn, m4mfn);
-        /* Remember the level of this table */
-        mfn_to_page(m4mfn)->shadow2_flags = 4;
-#if SHADOW_PAGING_LEVELS < 4
-        // Install a monitor l3 table in slot 0 of the l4 table.
-        // This is used for shadow linear maps.
-        {
-            mfn_t m3mfn; 
-            l4_pgentry_t *l4e;
-            m3mfn = shadow2_alloc(d, PGC_SH2_monitor_table, 0);
-            mfn_to_page(m3mfn)->shadow2_flags = 3;
-            l4e = sh2_map_domain_page(m4mfn);
-            l4e[0] = l4e_from_pfn(mfn_x(m3mfn), __PAGE_HYPERVISOR);
-            sh2_unmap_domain_page(l4e);
-        }
-#endif /* SHADOW_PAGING_LEVELS < 4 */
-        return m4mfn;
-    }
-
-#elif CONFIG_PAGING_LEVELS == 3
-
-    {
-        struct domain *d = v->domain;
-        mfn_t m3mfn, m2mfn; 
-        l3_pgentry_t *l3e;
-        l2_pgentry_t *l2e;
-        int i;
-
-        m3mfn = shadow2_alloc(d, PGC_SH2_monitor_table, 0);
-        /* Remember the level of this table */
-        mfn_to_page(m3mfn)->shadow2_flags = 3;
-
-        // Install a monitor l2 table in slot 3 of the l3 table.
-        // This is used for all Xen entries, including linear maps
-        m2mfn = shadow2_alloc(d, PGC_SH2_monitor_table, 0);
-        mfn_to_page(m2mfn)->shadow2_flags = 2;
-        l3e = sh2_map_domain_page(m3mfn);
-        l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT);
-        sh2_install_xen_entries_in_l2h(v, m2mfn);
-        /* Install the monitor's own linear map */
-        l2e = sh2_map_domain_page(m2mfn);
-        for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
-            l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
-                (l3e_get_flags(l3e[i]) & _PAGE_PRESENT) 
-                ? l2e_from_pfn(l3e_get_pfn(l3e[i]), __PAGE_HYPERVISOR) 
-                : l2e_empty();
-        sh2_unmap_domain_page(l2e);
-        sh2_unmap_domain_page(l3e);
-
-        SHADOW2_PRINTK("new monitor table: %#lx\n", mfn_x(m3mfn));
-        return m3mfn;
-    }
-
-#elif CONFIG_PAGING_LEVELS == 2
-
-    {
-        struct domain *d = v->domain;
-        mfn_t m2mfn;
-        m2mfn = shadow2_alloc(d, PGC_SH2_monitor_table, 0);
-        sh2_install_xen_entries_in_l2(v, m2mfn, m2mfn);
-        /* Remember the level of this table */
-        mfn_to_page(m2mfn)->shadow2_flags = 2;
-        return m2mfn;
-    }
-
-#else
-#error this should not happen
-#endif /* CONFIG_PAGING_LEVELS */
-}
-#endif /* SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS */
-
-/**************************************************************************/
-/* These functions also take a virtual address and return the level-N
- * shadow table mfn and entry, but they create the shadow pagetables if
- * they are needed.  The "demand" argument is non-zero when handling
- * a demand fault (so we know what to do about accessed bits &c).
- * If the necessary tables are not present in the guest, they return NULL. */
-#if GUEST_PAGING_LEVELS >= 4
-static shadow_l4e_t * shadow_get_and_create_l4e(struct vcpu *v, 
-                                                walk_t *gw, 
-                                                mfn_t *sl4mfn)
-{
-    /* There is always a shadow of the top level table.  Get it. */
-    *sl4mfn = pagetable_get_mfn(v->arch.shadow_table);
-    /* Reading the top level table is always valid. */
-    return sh2_linear_l4_table(v) + shadow_l4_linear_offset(gw->va);
-}
-#endif /* GUEST_PAGING_LEVELS >= 4 */
-
-
-#if GUEST_PAGING_LEVELS >= 3
-static shadow_l3e_t * shadow_get_and_create_l3e(struct vcpu *v, 
-                                                walk_t *gw, 
-                                                mfn_t *sl3mfn,
-                                                fetch_type_t ft)
-{
-#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
-    mfn_t sl4mfn;
-    shadow_l4e_t *sl4e;
-    if ( !valid_mfn(gw->l3mfn) ) return NULL; /* No guest page. */
-    /* Get the l4e */
-    sl4e = shadow_get_and_create_l4e(v, gw, &sl4mfn);
-    ASSERT(sl4e != NULL);
-    if ( shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT ) 
-    {
-        *sl3mfn = shadow_l4e_get_mfn(*sl4e);
-        ASSERT(valid_mfn(*sl3mfn));
-    } 
-    else 
-    {
-        int r;
-        shadow_l4e_t new_sl4e;
-        /* No l3 shadow installed: find and install it. */
-        *sl3mfn = get_shadow_status(v, gw->l3mfn, PGC_SH2_l3_shadow);
-        if ( !valid_mfn(*sl3mfn) ) 
-        {
-            /* No l3 shadow of this page exists at all: make one. */
-            *sl3mfn = sh2_make_shadow(v, gw->l3mfn, PGC_SH2_l3_shadow);
-        }
-        /* Install the new sl3 table in the sl4e */
-        l4e_propagate_from_guest(v, gw->l4e, gw->l4mfn, 
-                                 *sl3mfn, &new_sl4e, ft);
-        r = shadow_set_l4e(v, sl4e, new_sl4e, sl4mfn);
-        ASSERT((r & SHADOW2_SET_FLUSH) == 0);
-    }
-    /* Now follow it down a level.  Guaranteed to succeed. */
-    return sh2_linear_l3_table(v) + shadow_l3_linear_offset(gw->va);
-#else /* PAE... */
-    /* There is always a shadow of the top level table.  Get it. */
-    *sl3mfn = pagetable_get_mfn(v->arch.shadow_table);
-    /* This next line is important: the shadow l3 table is in an 8k
-     * shadow and we need to return the right mfn of the pair. This call
-     * will set it for us as a side-effect. */
-    (void) shadow_l3_index(sl3mfn, guest_index(gw->l3e));
-    ASSERT(v->arch.shadow_vtable);
-    return ((shadow_l3e_t *)v->arch.shadow_vtable) 
-        + shadow_l3_table_offset(gw->va);
-#endif /* GUEST_PAGING_LEVELS >= 4 */
-}
-#endif /* GUEST_PAGING_LEVELS >= 3 */
-
-
-static shadow_l2e_t * shadow_get_and_create_l2e(struct vcpu *v, 
-                                                walk_t *gw, 
-                                                mfn_t *sl2mfn,
-                                                fetch_type_t ft)
-{
-#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64bit... */
-    mfn_t sl3mfn = _mfn(INVALID_MFN);
-    shadow_l3e_t *sl3e;
-    if ( !valid_mfn(gw->l2mfn) ) return NULL; /* No guest page. */
-    /* Get the l3e */
-    sl3e = shadow_get_and_create_l3e(v, gw, &sl3mfn, ft);
-    ASSERT(sl3e != NULL);  /* Since we know guest PT is valid this far */
-    if ( shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT ) 
-    {
-        *sl2mfn = shadow_l3e_get_mfn(*sl3e);
-        ASSERT(valid_mfn(*sl2mfn));
-    } 
-    else 
-    {
-        int r;
-        shadow_l3e_t new_sl3e;
-        /* No l2 shadow installed: find and install it. */
-        *sl2mfn = get_shadow_status(v, gw->l2mfn, PGC_SH2_l2_shadow);
-        if ( !valid_mfn(*sl2mfn) ) 
-        {
-            /* No l2 shadow of this page exists at all: make one. */
-            *sl2mfn = sh2_make_shadow(v, gw->l2mfn, PGC_SH2_l2_shadow);
-        }
-        /* Install the new sl2 table in the sl3e */
-        l3e_propagate_from_guest(v, gw->l3e, gw->l3mfn, 
-                                 *sl2mfn, &new_sl3e, ft);
-        r = shadow_set_l3e(v, sl3e, new_sl3e, sl3mfn);
-        ASSERT((r & SHADOW2_SET_FLUSH) == 0);
-#if GUEST_PAGING_LEVELS == 3 
-        /* Need to sync up the linear maps, as we are about to use them */
-        ASSERT( r & SHADOW2_SET_L3PAE_RECOPY );
-        sh2_pae_recopy(v->domain);
-#endif
-    }
-    /* Now follow it down a level.  Guaranteed to succeed. */
-    return sh2_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
-#else /* 32bit... */
-    /* There is always a shadow of the top level table.  Get it. */
-    *sl2mfn = pagetable_get_mfn(v->arch.shadow_table);
-    /* This next line is important: the guest l2 has a 16k
-     * shadow, we need to return the right mfn of the four. This
-     * call will set it for us as a side-effect. */
-    (void) shadow_l2_index(sl2mfn, guest_index(gw->l2e));
-    /* Reading the top level table is always valid. */
-    return sh2_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
-#endif 
-}
-
-
-static shadow_l1e_t * shadow_get_and_create_l1e(struct vcpu *v, 
-                                                walk_t *gw, 
-                                                mfn_t *sl1mfn,
-                                                fetch_type_t ft)
-{
-    mfn_t sl2mfn;
-    shadow_l2e_t *sl2e;
-
-    /* Get the l2e */
-    sl2e = shadow_get_and_create_l2e(v, gw, &sl2mfn, ft);
-    if ( sl2e == NULL ) return NULL;
-    if ( shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT ) 
-    {
-        *sl1mfn = shadow_l2e_get_mfn(*sl2e);
-        ASSERT(valid_mfn(*sl1mfn));
-    } 
-    else 
-    {
-        shadow_l2e_t new_sl2e;
-        int r, flags = guest_l2e_get_flags(*gw->l2e);
-        /* No l1 shadow installed: find and install it. */
-        if ( !(flags & _PAGE_PRESENT) )
-            return NULL; /* No guest page. */
-        if ( guest_supports_superpages(v) && (flags & _PAGE_PSE) ) 
-        {
-            /* Splintering a superpage */
-            gfn_t l2gfn = guest_l2e_get_gfn(*gw->l2e);
-            *sl1mfn = get_fl1_shadow_status(v, l2gfn);
-            if ( !valid_mfn(*sl1mfn) ) 
-            {
-                /* No fl1 shadow of this superpage exists at all: make one. */
-                *sl1mfn = make_fl1_shadow(v, l2gfn);
-            }
-        } 
-        else 
-        {
-            /* Shadowing an actual guest l1 table */
-            if ( !valid_mfn(gw->l2mfn) ) return NULL; /* No guest page. */
-            *sl1mfn = get_shadow_status(v, gw->l1mfn, PGC_SH2_l1_shadow);
-            if ( !valid_mfn(*sl1mfn) ) 
-            {
-                /* No l1 shadow of this page exists at all: make one. */
-                *sl1mfn = sh2_make_shadow(v, gw->l1mfn, PGC_SH2_l1_shadow);
-            }
-        }
-        /* Install the new sl1 table in the sl2e */
-        l2e_propagate_from_guest(v, gw->l2e, gw->l2mfn, 
-                                 *sl1mfn, &new_sl2e, ft);
-        r = shadow_set_l2e(v, sl2e, new_sl2e, sl2mfn);
-        ASSERT((r & SHADOW2_SET_FLUSH) == 0);        
-        /* This next line is important: in 32-on-PAE and 32-on-64 modes,
-         * the guest l1 table has an 8k shadow, and we need to return
-         * the right mfn of the pair. This call will set it for us as a
-         * side-effect.  (In all other cases, it's a no-op and will be
-         * compiled out.) */
-        (void) shadow_l1_index(sl1mfn, guest_l1_table_offset(gw->va));
-    }
-    /* Now follow it down a level.  Guaranteed to succeed. */
-    return sh2_linear_l1_table(v) + shadow_l1_linear_offset(gw->va);
-}
-
-
-
-/**************************************************************************/
-/* Destructors for shadow tables: 
- * Unregister the shadow, decrement refcounts of any entries present in it,
- * and release the memory.
- *
- * N.B. These destructors do not clear the contents of the shadows.
- *      This allows us to delay TLB shootdowns until the page is being reused.
- *      See shadow2_alloc() and shadow2_free() for how this is handled.
- */
-
-#if GUEST_PAGING_LEVELS >= 4
-void sh2_destroy_l4_shadow(struct vcpu *v, mfn_t smfn)
-{
-    shadow_l4e_t *sl4e;
-    u32 t = mfn_to_page(smfn)->count_info & PGC_SH2_type_mask;
-    mfn_t gmfn, sl4mfn;
-    int xen_mappings;
-
-    SHADOW2_DEBUG(DESTROY_SHADOW,
-                  "%s(%05lx)\n", __func__, mfn_x(smfn));
-    ASSERT(t == PGC_SH2_l4_shadow);
-
-    /* Record that the guest page isn't shadowed any more (in this type) */
-    gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
-    delete_shadow2_status(v, gmfn, t, smfn);
-    shadow2_demote(v, gmfn, t);
-    /* Take this shadow off the list of root shadows */
-    list_del_init(&mfn_to_page(smfn)->list);
-
-    /* Decrement refcounts of all the old entries */
-    xen_mappings = (!shadow2_mode_external(v->domain));
-    sl4mfn = smfn; 
-    SHADOW2_FOREACH_L4E(sl4mfn, sl4e, 0, 0, xen_mappings, {
-        if ( shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT ) 
-        {
-            sh2_put_ref(v, shadow_l4e_get_mfn(*sl4e),
-                        (((paddr_t)mfn_x(sl4mfn)) << PAGE_SHIFT) 
-                        | ((unsigned long)sl4e & ~PAGE_MASK));
-        }
-    });
-    
-    /* Put the memory back in the pool */
-    shadow2_free(v->domain, smfn);
-}
-#endif    
-
-#if GUEST_PAGING_LEVELS >= 3
-void sh2_destroy_l3_shadow(struct vcpu *v, mfn_t smfn)
-{
-    shadow_l3e_t *sl3e;
-    u32 t = mfn_to_page(smfn)->count_info & PGC_SH2_type_mask;
-    mfn_t gmfn, sl3mfn;
-
-    SHADOW2_DEBUG(DESTROY_SHADOW,
-                  "%s(%05lx)\n", __func__, mfn_x(smfn));
-    ASSERT(t == PGC_SH2_l3_shadow);
-
-    /* Record that the guest page isn't shadowed any more (in this type) */
-    gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
-    delete_shadow2_status(v, gmfn, t, smfn);
-    shadow2_demote(v, gmfn, t);
-#if GUEST_PAGING_LEVELS == 3
-    /* Take this shadow off the list of root shadows */
-    list_del_init(&mfn_to_page(smfn)->list);
-#endif
-
-    /* Decrement refcounts of all the old entries */
-    sl3mfn = smfn; 
-    SHADOW2_FOREACH_L3E(sl3mfn, sl3e, 0, 0, {
-        if ( shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT ) 
-            sh2_put_ref(v, shadow_l3e_get_mfn(*sl3e),
-                        (((paddr_t)mfn_x(sl3mfn)) << PAGE_SHIFT) 
-                        | ((unsigned long)sl3e & ~PAGE_MASK));
-    });
-
-    /* Put the memory back in the pool */
-    shadow2_free(v->domain, smfn);
-}
-#endif    
-
-
-#if GUEST_PAGING_LEVELS == 3
-static void sh2_destroy_l3_subshadow(struct vcpu *v, 
-                                     shadow_l3e_t *sl3e)
-/* Tear down just a single 4-entry l3 on a 2-page l3 shadow. */
-{
-    int i;
-    ASSERT((unsigned long)sl3e % (4 * sizeof (shadow_l3e_t)) == 0); 
-    for ( i = 0; i < GUEST_L3_PAGETABLE_ENTRIES; i++ ) 
-        if ( shadow_l3e_get_flags(sl3e[i]) & _PAGE_PRESENT ) 
-            sh2_put_ref(v, shadow_l3e_get_mfn(sl3e[i]),
-                        maddr_from_mapped_domain_page(sl3e));
-}
-#endif
-
-#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
-void sh2_unpin_all_l3_subshadows(struct vcpu *v, mfn_t smfn)
-/* Walk a full PAE l3 shadow, unpinning all of the subshadows on it */
-{
-    int i, j;
-    struct pae_l3_bookkeeping *bk;
-    
-    ASSERT((mfn_to_page(smfn)->count_info & PGC_SH2_type_mask) 
-           == PGC_SH2_l3_pae_shadow);
-    /* The subshadows are split, 64 on each page of the shadow */
-    for ( i = 0; i < 2; i++ ) 
-    {
-        void *p = sh2_map_domain_page(_mfn(mfn_x(smfn) + i));
-        for ( j = 0; j < 64; j++ )
-        {
-            /* Every second 32-byte region is a bookkeeping entry */
-            bk = (struct pae_l3_bookkeeping *)(p + (64 * j) + 32);
-            if ( bk->pinned )
-                sh2_unpin_l3_subshadow(v, (shadow_l3e_t *)(p + (64*j)), smfn);
-            /* Check whether we've just freed the whole shadow */
-            if ( (mfn_to_page(smfn)->count_info & PGC_SH2_count_mask) == 0 ) 
-            {
-                sh2_unmap_domain_page(p);
-                return;
-            }
-        }
-        sh2_unmap_domain_page(p);
-    }
-}
-#endif
-
-void sh2_destroy_l2_shadow(struct vcpu *v, mfn_t smfn)
-{
-    shadow_l2e_t *sl2e;
-    u32 t = mfn_to_page(smfn)->count_info & PGC_SH2_type_mask;
-    mfn_t gmfn, sl2mfn;
-    int xen_mappings;
-
-    SHADOW2_DEBUG(DESTROY_SHADOW,
-                  "%s(%05lx)\n", __func__, mfn_x(smfn));
-    ASSERT(t == PGC_SH2_l2_shadow 
-           || t == PGC_SH2_l2h_pae_shadow);
-
-    /* Record that the guest page isn't shadowed any more (in this type) */
-    gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
-    delete_shadow2_status(v, gmfn, t, smfn);
-    shadow2_demote(v, gmfn, t);
-#if GUEST_PAGING_LEVELS == 2
-    /* Take this shadow off the list of root shadows */
-    list_del_init(&mfn_to_page(smfn)->list);
-#endif
-
-    /* Decrement refcounts of all the old entries */
-    sl2mfn = smfn;
-    xen_mappings = (!shadow2_mode_external(v->domain) &&
-                    ((GUEST_PAGING_LEVELS == 2) ||
-                     ((GUEST_PAGING_LEVELS == 3) &&
-                      (t == PGC_SH2_l2h_pae_shadow))));
-    SHADOW2_FOREACH_L2E(sl2mfn, sl2e, 0, 0, xen_mappings, {
-        if ( shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT ) 
-            sh2_put_ref(v, shadow_l2e_get_mfn(*sl2e),
-                        (((paddr_t)mfn_x(sl2mfn)) << PAGE_SHIFT) 
-                        | ((unsigned long)sl2e & ~PAGE_MASK));
-    });
-
-    /* Put the memory back in the pool */
-    shadow2_free(v->domain, smfn);
-}
-
-void sh2_destroy_l1_shadow(struct vcpu *v, mfn_t smfn)
-{
-    struct domain *d = v->domain;
-    shadow_l1e_t *sl1e;
-    u32 t = mfn_to_page(smfn)->count_info & PGC_SH2_type_mask;
-
-    SHADOW2_DEBUG(DESTROY_SHADOW,
-                  "%s(%05lx)\n", __func__, mfn_x(smfn));
-    ASSERT(t == PGC_SH2_l1_shadow || t == PGC_SH2_fl1_shadow);
-
-    /* Record that the guest page isn't shadowed any more (in this type) */
-    if ( t == PGC_SH2_fl1_shadow )
-    {
-        gfn_t gfn = _gfn(mfn_to_page(smfn)->u.inuse.type_info);
-        delete_fl1_shadow_status(v, gfn, smfn);
-    }
-    else 
-    {
-        mfn_t gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
-        delete_shadow2_status(v, gmfn, t, smfn);
-        shadow2_demote(v, gmfn, t);
-    }
-    
-    if ( shadow2_mode_refcounts(d) )
-    {
-        /* Decrement refcounts of all the old entries */
-        mfn_t sl1mfn = smfn; 
-        SHADOW2_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
-            if ( shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT ) 
-                shadow2_put_page_from_l1e(*sl1e, d);
-        });
-    }
-    
-    /* Put the memory back in the pool */
-    shadow2_free(v->domain, smfn);
-}
-
-#if SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS
-void sh2_destroy_monitor_table(struct vcpu *v, mfn_t mmfn)
-{
-    struct domain *d = v->domain;
-    ASSERT((mfn_to_page(mmfn)->count_info & PGC_SH2_type_mask)
-           == PGC_SH2_monitor_table);
-
-#if (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS != 4)
-    /* Need to destroy the l3 monitor page in slot 0 too */
-    {
-        l4_pgentry_t *l4e = sh2_map_domain_page(mmfn);
-        ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT);
-        shadow2_free(d, _mfn(l4e_get_pfn(l4e[0])));
-        sh2_unmap_domain_page(l4e);
-    }
-#elif CONFIG_PAGING_LEVELS == 3
-    /* Need to destroy the l2 monitor page in slot 4 too */
-    {
-        l3_pgentry_t *l3e = sh2_map_domain_page(mmfn);
-        ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
-        shadow2_free(d, _mfn(l3e_get_pfn(l3e[3])));
-        sh2_unmap_domain_page(l3e);
-    }
-#endif
-
-    /* Put the memory back in the pool */
-    shadow2_free(d, mmfn);
-}
-#endif
-
-/**************************************************************************/
-/* Functions to destroy non-Xen mappings in a pagetable hierarchy.
- * These are called from common code when we are running out of shadow
- * memory, and unpinning all the top-level shadows hasn't worked. 
- *
- * This implementation is pretty crude and slow, but we hope that it won't 
- * be called very often. */
-
-#if GUEST_PAGING_LEVELS == 2
-
-void sh2_unhook_32b_mappings(struct vcpu *v, mfn_t sl2mfn)
-{    
-    shadow_l2e_t *sl2e;
-    int xen_mappings = !shadow2_mode_external(v->domain);
-    SHADOW2_FOREACH_L2E(sl2mfn, sl2e, 0, 0, xen_mappings, {
-        (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
-    });
-}
-
-#elif GUEST_PAGING_LEVELS == 3
-
-void sh2_unhook_pae_mappings(struct vcpu *v, mfn_t sl3mfn)
-/* Walk a full PAE l3 shadow, unhooking entries from all the subshadows */
-{
-    shadow_l3e_t *sl3e;
-    SHADOW2_FOREACH_L3E(sl3mfn, sl3e, 0, 0, {
-        if ( (shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) ) {
-            mfn_t sl2mfn = shadow_l3e_get_mfn(*sl3e);
-            if ( (mfn_to_page(sl2mfn)->count_info & PGC_SH2_type_mask) 
-                 == PGC_SH2_l2h_pae_shadow ) 
-            {
-                /* High l2: need to pick particular l2es to unhook */
-                shadow_l2e_t *sl2e;
-                SHADOW2_FOREACH_L2E(sl2mfn, sl2e, 0, 0, 1, {
-                    (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
-                });
-            }
-            else
-            {
-                /* Normal l2: can safely unhook the whole l3e */
-                (void) shadow_set_l3e(v, sl3e, shadow_l3e_empty(), sl3mfn);
-            }
-        }
-    });
-    /* We've changed PAE L3 entries: must sync up various copies of them */
-    sh2_pae_recopy(v->domain);
-}
-
-#elif GUEST_PAGING_LEVELS == 4
-
-void sh2_unhook_64b_mappings(struct vcpu *v, mfn_t sl4mfn)
-{
-    shadow_l4e_t *sl4e;
-    int xen_mappings = !shadow2_mode_external(v->domain);
-    SHADOW2_FOREACH_L4E(sl4mfn, sl4e, 0, 0, xen_mappings, {
-        (void) shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn);
-    });
-}
-
-#endif
-
-/**************************************************************************/
-/* Internal translation functions.
- * These functions require a pointer to the shadow entry that will be updated.
- */
-
-/* These functions take a new guest entry, translate it to shadow and write 
- * the shadow entry.
- *
- * They return the same bitmaps as the shadow_set_lXe() functions.
- */
-
-#if GUEST_PAGING_LEVELS >= 4
-static int validate_gl4e(struct vcpu *v, void *new_ge, mfn_t sl4mfn, void *se)
-{
-    shadow_l4e_t new_sl4e;
-    guest_l4e_t *new_gl4e = new_ge;
-    shadow_l4e_t *sl4p = se;
-    mfn_t sl3mfn = _mfn(INVALID_MFN);
-    int result = 0;
-
-    perfc_incrc(shadow2_validate_gl4e_calls);
-
-    if ( guest_l4e_get_flags(*new_gl4e) & _PAGE_PRESENT )
-    {
-        gfn_t gl3gfn = guest_l4e_get_gfn(*new_gl4e);
-        mfn_t gl3mfn = vcpu_gfn_to_mfn(v, gl3gfn);
-        if ( valid_mfn(gl3mfn) )
-            sl3mfn = get_shadow_status(v, gl3mfn, PGC_SH2_l3_shadow);
-        else
-            result |= SHADOW2_SET_ERROR;
-    }
-    l4e_propagate_from_guest(v, new_gl4e, _mfn(INVALID_MFN),
-                             sl3mfn, &new_sl4e, ft_prefetch);
-    result |= shadow_set_l4e(v, sl4p, new_sl4e, sl4mfn);
-    return result;
-}
-#endif // GUEST_PAGING_LEVELS >= 4
-
-#if GUEST_PAGING_LEVELS >= 3
-static int validate_gl3e(struct vcpu *v, void *new_ge, mfn_t sl3mfn, void *se)
-{
-    shadow_l3e_t new_sl3e;
-    guest_l3e_t *new_gl3e = new_ge;
-    shadow_l3e_t *sl3p = se;
-    mfn_t sl2mfn = _mfn(INVALID_MFN);
-    int result = 0;
-
-    perfc_incrc(shadow2_validate_gl3e_calls);
-
-    if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT )
-    {
-        gfn_t gl2gfn = guest_l3e_get_gfn(*new_gl3e);
-        mfn_t gl2mfn = vcpu_gfn_to_mfn(v, gl2gfn);
-        if ( valid_mfn(gl2mfn) )
-            sl2mfn = get_shadow_status(v, gl2mfn, PGC_SH2_l2_shadow);
-        else
-            result |= SHADOW2_SET_ERROR;
-    }
-    l3e_propagate_from_guest(v, new_gl3e, _mfn(INVALID_MFN), 
-                             sl2mfn, &new_sl3e, ft_prefetch);
-    result |= shadow_set_l3e(v, sl3p, new_sl3e, sl3mfn);
-
-#if GUEST_PAGING_LEVELS == 3
-    /* We have changed a PAE l3 entry: need to sync up the possible copies 
-     * of it */
-    if ( result & SHADOW2_SET_L3PAE_RECOPY )
-        sh2_pae_recopy(v->domain);
-#endif
-
-    return result;
-}
-#endif // GUEST_PAGING_LEVELS >= 3
-
-static int validate_gl2e(struct vcpu *v, void *new_ge, mfn_t sl2mfn, void *se)
-{
-    shadow_l2e_t new_sl2e;
-    guest_l2e_t *new_gl2e = new_ge;
-    shadow_l2e_t *sl2p = se;
-    mfn_t sl1mfn = _mfn(INVALID_MFN);
-    int result = 0;
-
-    perfc_incrc(shadow2_validate_gl2e_calls);
-
-    if ( guest_l2e_get_flags(*new_gl2e) & _PAGE_PRESENT )
-    {
-        gfn_t gl1gfn = guest_l2e_get_gfn(*new_gl2e);
-        if ( guest_supports_superpages(v) &&
-             (guest_l2e_get_flags(*new_gl2e) & _PAGE_PSE) )
-        {
-            // superpage -- need to look up the shadow L1 which holds the
-            // splitters...
-            sl1mfn = get_fl1_shadow_status(v, gl1gfn);
-#if 0
-            // XXX - it's possible that we want to do some kind of prefetch
-            // for superpage fl1's here, but this is *not* on the demand path,
-            // so we'll hold off trying that for now...
-            //
-            if ( !valid_mfn(sl1mfn) )
-                sl1mfn = make_fl1_shadow(v, gl1gfn);
-#endif
-        }
-        else
-        {
-            mfn_t gl1mfn = vcpu_gfn_to_mfn(v, gl1gfn);
-            if ( valid_mfn(gl1mfn) )
-                sl1mfn = get_shadow_status(v, gl1mfn, PGC_SH2_l1_shadow);
-            else
-                result |= SHADOW2_SET_ERROR;
-        }
-    }
-    l2e_propagate_from_guest(v, new_gl2e, _mfn(INVALID_MFN),
-                             sl1mfn, &new_sl2e, ft_prefetch);
-    result |= shadow_set_l2e(v, sl2p, new_sl2e, sl2mfn);
-
-    return result;
-}
-
-static int validate_gl1e(struct vcpu *v, void *new_ge, mfn_t sl1mfn, void *se)
-{
-    shadow_l1e_t new_sl1e;
-    guest_l1e_t *new_gl1e = new_ge;
-    shadow_l1e_t *sl1p = se;
-    gfn_t gfn;
-    mfn_t mfn;
-    int result = 0;
-
-    perfc_incrc(shadow2_validate_gl1e_calls);
-
-    gfn = guest_l1e_get_gfn(*new_gl1e);
-    mfn = vcpu_gfn_to_mfn(v, gfn);
-
-    l1e_propagate_from_guest(v, *new_gl1e, &new_sl1e, 
-                             /* mmio? */ !valid_mfn(mfn));
-    
-    result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
-    return result;
-}
-
-
-/**************************************************************************/
-/* Functions which translate and install a the shadows of arbitrary guest 
- * entries that we have just seen the guest write. */
-
-
-static inline int 
-sh2_map_and_validate(struct vcpu *v, mfn_t gmfn,
-                     void *new_gp, u32 size, u32 sh_type, 
-                     u32 (*shadow_index)(mfn_t *smfn, u32 idx),
-                     int (*validate_ge)(struct vcpu *v, void *ge, 
-                                        mfn_t smfn, void *se))
-/* Generic function for mapping and validating. */
-{
-    mfn_t smfn, smfn2, map_mfn;
-    shadow_l1e_t *sl1p;
-    u32 shadow_idx, guest_idx;
-    int result = 0;
-
-    /* Align address and size to guest entry boundaries */
-    size += (unsigned long)new_gp & (sizeof (guest_l1e_t) - 1);
-    new_gp = (void *)((unsigned long)new_gp & ~(sizeof (guest_l1e_t) - 1));
-    size = (size + sizeof (guest_l1e_t) - 1) & ~(sizeof (guest_l1e_t) - 1);
-    ASSERT(size + (((unsigned long)new_gp) & ~PAGE_MASK) <= PAGE_SIZE);
-
-    /* Map the shadow page */
-    smfn = get_shadow_status(v, gmfn, sh_type);
-    ASSERT(valid_mfn(smfn)); /* Otherwise we would not have been called */
-    guest_idx = guest_index(new_gp);
-    map_mfn = smfn;
-    shadow_idx = shadow_index(&map_mfn, guest_idx);
-    sl1p = map_shadow_page(map_mfn);
-
-    /* Validate one entry at a time */
-    while ( size )
-    {
-        smfn2 = smfn;
-        guest_idx = guest_index(new_gp);
-        shadow_idx = shadow_index(&smfn2, guest_idx);
-        if ( mfn_x(smfn2) != mfn_x(map_mfn) )
-        {
-            /* We have moved to another page of the shadow */
-            map_mfn = smfn2;
-            unmap_shadow_page(sl1p);
-            sl1p = map_shadow_page(map_mfn);
-        }
-        result |= validate_ge(v,
-                              new_gp,
-                              map_mfn,
-                              &sl1p[shadow_idx]);
-        size -= sizeof(guest_l1e_t);
-        new_gp += sizeof(guest_l1e_t);
-    }
-    unmap_shadow_page(sl1p);
-    return result;
-}
-
-
-int
-sh2_map_and_validate_gl4e(struct vcpu *v, mfn_t gl4mfn,
-                          void *new_gl4p, u32 size)
-{
-#if GUEST_PAGING_LEVELS >= 4
-    return sh2_map_and_validate(v, gl4mfn, new_gl4p, size, 
-                                PGC_SH2_l4_shadow, 
-                                shadow_l4_index, 
-                                validate_gl4e);
-#else // ! GUEST_PAGING_LEVELS >= 4
-    SHADOW2_PRINTK("called in wrong paging mode!\n");
-    BUG();
-    return 0;
-#endif 
-}
-    
-int
-sh2_map_and_validate_gl3e(struct vcpu *v, mfn_t gl3mfn,
-                          void *new_gl3p, u32 size)
-{
-#if GUEST_PAGING_LEVELS >= 3
-    return sh2_map_and_validate(v, gl3mfn, new_gl3p, size, 
-                                PGC_SH2_l3_shadow, 
-                                shadow_l3_index, 
-                                validate_gl3e);
-#else // ! GUEST_PAGING_LEVELS >= 3
-    SHADOW2_PRINTK("called in wrong paging mode!\n");
-    BUG();
-    return 0;
-#endif
-}
-
-int
-sh2_map_and_validate_gl2e(struct vcpu *v, mfn_t gl2mfn,
-                          void *new_gl2p, u32 size)
-{
-    return sh2_map_and_validate(v, gl2mfn, new_gl2p, size, 
-                                PGC_SH2_l2_shadow, 
-                                shadow_l2_index, 
-                                validate_gl2e);
-}
-
-int
-sh2_map_and_validate_gl2he(struct vcpu *v, mfn_t gl2mfn,
-                           void *new_gl2p, u32 size)
-{
-#if GUEST_PAGING_LEVELS == 3
-    return sh2_map_and_validate(v, gl2mfn, new_gl2p, size, 
-                                PGC_SH2_l2h_shadow, 
-                                shadow_l2_index, 
-                                validate_gl2e);
-#else /* Non-PAE guests don't have different kinds of l2 table */
-    SHADOW2_PRINTK("called in wrong paging mode!\n");
-    BUG();
-    return 0;
-#endif
-}
-
-int
-sh2_map_and_validate_gl1e(struct vcpu *v, mfn_t gl1mfn,
-                          void *new_gl1p, u32 size)
-{
-    return sh2_map_and_validate(v, gl1mfn, new_gl1p, size, 
-                                PGC_SH2_l1_shadow, 
-                                shadow_l1_index, 
-                                validate_gl1e);
-}
-
-
-/**************************************************************************/
-/* Optimization: If we see two emulated writes of zeros to the same
- * page-table without another kind of page fault in between, we guess
- * that this is a batch of changes (for process destruction) and
- * unshadow the page so we don't take a pagefault on every entry.  This
- * should also make finding writeable mappings of pagetables much
- * easier. */
-
-/* Look to see if this is the second emulated write in a row to this
- * page, and unshadow/unhook if it is */
-static inline void check_for_early_unshadow(struct vcpu *v, mfn_t gmfn)
-{
-#if SHADOW2_OPTIMIZATIONS & SH2OPT_EARLY_UNSHADOW
-    if ( v->arch.shadow2.last_emulated_mfn == mfn_x(gmfn) &&
-         sh2_mfn_is_a_page_table(gmfn) )
-    {
-        u32 flags = mfn_to_page(gmfn)->shadow2_flags;
-        mfn_t smfn;
-        if ( !(flags & (SH2F_L2_32|SH2F_L3_PAE|SH2F_L4_64)) )
-        {
-            perfc_incrc(shadow2_early_unshadow);
-            sh2_remove_shadows(v, gmfn, 0 /* Can fail to unshadow */ );
-            return;
-        }
-        /* SH2F_unhooked_mappings is set to make sure we only unhook
-         * once in a single batch of updates. It is reset when this
-         * top-level page is loaded into CR3 again */
-        if ( !(flags & SH2F_unhooked_mappings) ) 
-        {
-            perfc_incrc(shadow2_early_unshadow_top);
-            mfn_to_page(gmfn)->shadow2_flags |= SH2F_unhooked_mappings;
-            if ( flags & SH2F_L2_32 )
-            {
-                smfn = get_shadow_status(v, gmfn, PGC_SH2_l2_32_shadow);
-                shadow2_unhook_mappings(v, smfn);
-            }
-            if ( flags & SH2F_L3_PAE ) 
-            {
-                smfn = get_shadow_status(v, gmfn, PGC_SH2_l3_pae_shadow);
-                shadow2_unhook_mappings(v, smfn);
-            }
-            if ( flags & SH2F_L4_64 ) 
-            {
-                smfn = get_shadow_status(v, gmfn, PGC_SH2_l4_64_shadow);
-                shadow2_unhook_mappings(v, smfn);
-            }
-        }
-    }
-    v->arch.shadow2.last_emulated_mfn = mfn_x(gmfn);
-#endif
-}
-
-/* Stop counting towards early unshadows, as we've seen a real page fault */
-static inline void reset_early_unshadow(struct vcpu *v)
-{
-#if SHADOW2_OPTIMIZATIONS & SH2OPT_EARLY_UNSHADOW
-    v->arch.shadow2.last_emulated_mfn = INVALID_MFN;
-#endif
-}
-
-
-
-/**************************************************************************/
-/* Entry points into the shadow code */
-
-/* Called from pagefault handler in Xen, and from the HVM trap handlers
- * for pagefaults.  Returns 1 if this fault was an artefact of the
- * shadow code (and the guest should retry) or 0 if it is not (and the
- * fault should be handled elsewhere or passed to the guest). */
-
-static int sh2_page_fault(struct vcpu *v, 
-                          unsigned long va, 
-                          struct cpu_user_regs *regs)
-{
-    struct domain *d = v->domain;
-    walk_t gw;
-    u32 accumulated_gflags;
-    gfn_t gfn;
-    mfn_t gmfn, sl1mfn=_mfn(0);
-    shadow_l1e_t sl1e, *ptr_sl1e;
-    paddr_t gpa;
-    struct cpu_user_regs emul_regs;
-    struct x86_emulate_ctxt emul_ctxt;
-    int r, mmio;
-    fetch_type_t ft = 0;
-
-    //
-    // XXX: Need to think about eventually mapping superpages directly in the
-    //      shadow (when possible), as opposed to splintering them into a
-    //      bunch of 4K maps.
-    //
-
-    SHADOW2_PRINTK("d:v=%u:%u va=%#lx err=%u\n",
-                   v->domain->domain_id, v->vcpu_id, va, regs->error_code);
-    
-    shadow2_lock(d);
-
-    shadow2_audit_tables(v);
-                   
-    if ( guest_walk_tables(v, va, &gw, 1) != 0 )
-    {
-        SHADOW2_PRINTK("malformed guest pagetable!");
-        print_gw(&gw);
-    }
-
-    sh2_audit_gw(v, &gw);
-
-    // We do not look at the gw->l1e, as that will not exist for superpages.
-    // Instead, we use the gw->eff_l1e...
-    //
-    // We need not check all the levels of the guest page table entries for
-    // present vs not-present, as the eff_l1e will always be not present if
-    // one of the higher level entries is not present.
-    //
-    if ( unlikely(!(guest_l1e_get_flags(gw.eff_l1e) & _PAGE_PRESENT)) )
-    {
-        if ( hvm_guest(v) && !shadow2_vcpu_mode_translate(v) )
-        {
-            /* Not present in p2m map, means this is mmio */
-            gpa = va;
-            goto mmio;
-        }
-
-        perfc_incrc(shadow2_fault_bail_not_present);
-        goto not_a_shadow_fault;
-    }
-
-    // All levels of the guest page table are now known to be present.
-    accumulated_gflags = accumulate_guest_flags(&gw);
-
-    // Check for attempts to access supervisor-only pages from user mode,
-    // i.e. ring 3.  Such errors are not caused or dealt with by the shadow
-    // code.
-    //
-    if ( (regs->error_code & PFEC_user_mode) &&
-         !(accumulated_gflags & _PAGE_USER) )
-    {
-        /* illegal user-mode access to supervisor-only page */
-        perfc_incrc(shadow2_fault_bail_user_supervisor);
-        goto not_a_shadow_fault;
-    }
-
-    // Was it a write fault?
-    //
-    if ( regs->error_code & PFEC_write_access )
-    {
-        if ( unlikely(!(accumulated_gflags & _PAGE_RW)) )
-        {
-            perfc_incrc(shadow2_fault_bail_ro_mapping);
-            goto not_a_shadow_fault;
-        }
-    }
-    else // must have been either an insn fetch or read fault
-    {
-        // Check for NX bit violations: attempts to execute code that is
-        // marked "do not execute".  Such errors are not caused or dealt with
-        // by the shadow code.
-        //
-        if ( regs->error_code & PFEC_insn_fetch )
-        {
-            if ( accumulated_gflags & _PAGE_NX_BIT )
-            {
-                /* NX prevented this code fetch */
-                perfc_incrc(shadow2_fault_bail_nx);
-                goto not_a_shadow_fault;
-            }
-        }
-    }
-
-    /* Is this an MMIO access? */
-    gfn = guest_l1e_get_gfn(gw.eff_l1e);
-    mmio = ( hvm_guest(v) 
-             && shadow2_vcpu_mode_translate(v) 
-             && mmio_space(gfn_to_paddr(gfn)) );
-
-    /* For MMIO, the shadow holds the *gfn*; for normal accesses, if holds 
-     * the equivalent mfn. */
-    if ( mmio ) 
-        gmfn = _mfn(gfn_x(gfn));
-    else
-    {
-        gmfn = vcpu_gfn_to_mfn(v, gfn);
-        if ( !valid_mfn(gmfn) )
-        {
-            perfc_incrc(shadow2_fault_bail_bad_gfn);
-            SHADOW2_PRINTK("BAD gfn=%"SH2_PRI_gfn" gmfn=%"SH2_PRI_mfn"\n", 
-                           gfn_x(gfn), mfn_x(gmfn));
-            goto not_a_shadow_fault;
-        }
-    }
-
-    /* Make sure there is enough free shadow memory to build a chain of
-     * shadow tables: one SHADOW2_MAX_ORDER chunk will always be enough
-     * to allocate all we need.  (We never allocate a top-level shadow
-     * on this path, only a 32b l1, pae l2+1 or 64b l3+2+1) */
-    shadow2_prealloc(d, SHADOW2_MAX_ORDER);
-
-    /* Acquire the shadow.  This must happen before we figure out the rights 
-     * for the shadow entry, since we might promote a page here. */
-    // XXX -- this code will need to change somewhat if/when the shadow code
-    // can directly map superpages...
-    ft = ((regs->error_code & PFEC_write_access) ?
-          ft_demand_write : ft_demand_read);
-    ptr_sl1e = shadow_get_and_create_l1e(v, &gw, &sl1mfn, ft);
-    ASSERT(ptr_sl1e);
-
-    /* Calculate the shadow entry */
-    if ( ft == ft_demand_write )
-    {
-        if ( l1e_write_fault(v, &gw, gmfn, &sl1e, mmio) )
-        {
-            perfc_incrc(shadow2_fault_emulate_write);
-            goto emulate;
-        }
-    }
-    else if ( l1e_read_fault(v, &gw, gmfn, &sl1e, mmio) )
-    {
-        perfc_incrc(shadow2_fault_emulate_read);
-        goto emulate;
-    }
-
-    /* Quick sanity check: we never make an MMIO entry that's got the 
-     * _PAGE_PRESENT flag set in it. */
-    ASSERT(!mmio || !(shadow_l1e_get_flags(sl1e) & _PAGE_PRESENT));
-
-    r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn);
-
-    if ( mmio ) 
-    {
-        gpa = guest_walk_to_gpa(&gw);
-        goto mmio;
-    }
-
-#if 0
-    if ( !(r & SHADOW2_SET_CHANGED) )
-        debugtrace_printk("%s: shadow_set_l1e(va=%p, sl1e=%" SH2_PRI_pte
-                          ") did not change anything\n",
-                          __func__, gw.va, l1e_get_intpte(sl1e));
-#endif
-
-    perfc_incrc(shadow2_fault_fixed);
-    d->arch.shadow2.fault_count++;
-    reset_early_unshadow(v);
-
- done:
-    sh2_audit_gw(v, &gw);
-    unmap_walk(v, &gw);
-    SHADOW2_PRINTK("fixed\n");
-    shadow2_audit_tables(v);
-    shadow2_unlock(d);
-    return EXCRET_fault_fixed;
-
- emulate:
-
-    /* Take the register set we were called with */
-    emul_regs = *regs;
-    if ( hvm_guest(v) )
-    {
-        /* Add the guest's segment selectors, rip, rsp. rflags */ 
-        hvm_store_cpu_guest_regs(v, &emul_regs, NULL);
-    }
-    emul_ctxt.regs = &emul_regs;
-    emul_ctxt.cr2 = va;
-    emul_ctxt.mode = hvm_guest(v) ? hvm_guest_x86_mode(v) : X86EMUL_MODE_HOST;
-
-    SHADOW2_PRINTK("emulate: eip=%#lx\n", emul_regs.eip);
-
-    v->arch.shadow2.propagate_fault = 0;
-    if ( x86_emulate_memop(&emul_ctxt, &shadow2_emulator_ops) )
-    {
-        SHADOW2_PRINTK("emulator failure, unshadowing mfn %#lx\n", 
-                       mfn_x(gmfn));
-        perfc_incrc(shadow2_fault_emulate_failed);
-        /* If this is actually a page table, then we have a bug, and need 
-         * to support more operations in the emulator.  More likely, 
-         * though, this is a hint that this page should not be shadowed. */
-        shadow2_remove_all_shadows(v, gmfn);
-        /* This means that actual missing operations will cause the 
-         * guest to loop on the same page fault. */
-        goto done;
-    }
-    if ( v->arch.shadow2.propagate_fault )
-    {
-        /* Emulation triggered another page fault */
-        goto not_a_shadow_fault;
-    }
-
-    /* Emulator has changed the user registers: write back */
-    if ( hvm_guest(v) )
-    {
-        /* Write back the guest's segment selectors, rip, rsp. rflags */ 
-        hvm_load_cpu_guest_regs(v, &emul_regs);
-        /* And don't overwrite those in the caller's regs. */
-        emul_regs.eip = regs->eip;
-        emul_regs.cs = regs->cs;
-        emul_regs.eflags = regs->eflags;
-        emul_regs.esp = regs->esp;
-        emul_regs.ss = regs->ss;
-        emul_regs.es = regs->es;
-        emul_regs.ds = regs->ds;
-        emul_regs.fs = regs->fs;
-        emul_regs.gs = regs->gs;
-    }
-    *regs = emul_regs;
-
-    goto done;
-
- mmio:
-    perfc_incrc(shadow2_fault_mmio);
-    if ( !hvm_apic_support(d) && (gpa >= 0xFEC00000) )
-    {
-        /* Need to deal with these disabled-APIC accesses, as
-         * handle_mmio() apparently does not currently do that. */
-        /* TJD: What about it, then?   For now, I'm turning this BUG() 
-         * into a domain_crash() since we don't want to kill Xen. */
-        SHADOW2_ERROR("disabled-APIC access: not supported\n.");
-        domain_crash(d); 
-    }
-    sh2_audit_gw(v, &gw);
-    unmap_walk(v, &gw);
-    SHADOW2_PRINTK("mmio\n");
-    shadow2_audit_tables(v);
-    reset_early_unshadow(v);
-    shadow2_unlock(d);
-    sh2_log_mmio(v, gpa);
-    handle_mmio(va, gpa);
-    return EXCRET_fault_fixed;
-
- not_a_shadow_fault:
-    sh2_audit_gw(v, &gw);
-    unmap_walk(v, &gw);
-    SHADOW2_PRINTK("not a shadow fault\n");
-    shadow2_audit_tables(v);
-    reset_early_unshadow(v);
-    shadow2_unlock(d);
-    return 0;
-}
-
-
-static int
-sh2_invlpg(struct vcpu *v, unsigned long va)
-/* Called when the guest requests an invlpg.  Returns 1 if the invlpg
- * instruction should be issued on the hardware, or 0 if it's safe not
- * to do so. */
-{
-    shadow_l2e_t *ptr_sl2e = shadow_get_l2e(v, va);
-
-    // XXX -- might be a good thing to prefetch the va into the shadow
-
-    // no need to flush anything if there's no SL2...
-    //
-    if ( !ptr_sl2e )
-        return 0;
-
-    // If there's nothing shadowed for this particular sl2e, then
-    // there is no need to do an invlpg, either...
-    //
-    if ( !(shadow_l2e_get_flags(*ptr_sl2e) & _PAGE_PRESENT) )
-        return 0;
-
-    // Check to see if the SL2 is a splintered superpage...
-    // If so, then we'll need to flush the entire TLB (because that's
-    // easier than invalidating all of the individual 4K pages).
-    //
-    if ( (mfn_to_page(shadow_l2e_get_mfn(*ptr_sl2e))->count_info &
-          PGC_SH2_type_mask) == PGC_SH2_fl1_shadow )
-    {
-        local_flush_tlb();
-        return 0;
-    }
-
-    return 1;
-}
-
-static unsigned long
-sh2_gva_to_gfn(struct vcpu *v, unsigned long va)
-/* Called to translate a guest virtual address to what the *guest*
- * pagetables would map it to. */
-{
-    walk_t gw;
-    gfn_t gfn;
-
-    guest_walk_tables(v, va, &gw, 0);
-    gfn = guest_walk_to_gfn(&gw);
-    unmap_walk(v, &gw);
-
-    return gfn_x(gfn);
-}
-
-
-static unsigned long
-sh2_gva_to_gpa(struct vcpu *v, unsigned long va)
-/* Called to translate a guest virtual address to what the *guest*
- * pagetables would map it to. */
-{
-    unsigned long gfn = sh2_gva_to_gfn(v, va);
-    if ( gfn == INVALID_GFN )
-        return 0;
-    else
-        return (gfn << PAGE_SHIFT) | (va & ~PAGE_MASK);
-}
-
-
-// XXX -- should this be in this file?
-//        Or should it be moved to shadow2-common.c?
-//
-/* returns a lowmem machine address of the copied HVM L3 root table
- * If clear_res != 0, then clear the PAE-l3 reserved bits in the copy,
- * otherwise blank out any entries with reserved bits in them.  */
-#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
-static unsigned long
-hvm_pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab, int clear_res)
-{
-    int i, f;
-    int res = (_PAGE_RW|_PAGE_NX_BIT|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY);
-    l3_pgentry_t new_l3e, *copy = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
-    memcpy(copy, l3tab, 4 * sizeof(l3_pgentry_t));
-    for ( i = 0; i < 4; i++ )
-    {
-        f = l3e_get_flags(l3tab[i]);
-        if ( (f & _PAGE_PRESENT) && (!(f & res) || clear_res) )
-            new_l3e = l3e_from_pfn(l3e_get_pfn(l3tab[i]), f & ~res);
-        else
-            new_l3e = l3e_empty();
-        safe_write_entry(&copy[i], &new_l3e);
-    }
-    return __pa(copy);
-}
-#endif
-
-
-static inline void
-sh2_update_linear_entries(struct vcpu *v)
-/* Sync up all the linear mappings for this vcpu's pagetables */
-{
-    struct domain *d = v->domain;
-
-    /* Linear pagetables in PV guests
-     * ------------------------------
-     *
-     * Guest linear pagetables, which map the guest pages, are at
-     * LINEAR_PT_VIRT_START.  Shadow linear pagetables, which map the
-     * shadows, are at SH_LINEAR_PT_VIRT_START.  Most of the time these
-     * are set up at shadow creation time, but (of course!) the PAE case
-     * is subtler.  Normal linear mappings are made by having an entry
-     * in the top-level table that points to itself (shadow linear) or
-     * to the guest top-level table (guest linear).  For PAE, to set up
-     * a linear map requires us to copy the four top-level entries into 
-     * level-2 entries.  That means that every time we change a PAE l3e,
-     * we need to reflect the change into the copy.
-     *
-     * Linear pagetables in HVM guests
-     * -------------------------------
-     *
-     * For HVM guests, the linear pagetables are installed in the monitor
-     * tables (since we can't put them in the shadow).  Shadow linear
-     * pagetables, which map the shadows, are at SH_LINEAR_PT_VIRT_START,
-     * and we use the linear pagetable slot at LINEAR_PT_VIRT_START for 
-     * a linear pagetable of the monitor tables themselves.  We have 
-     * the same issue of having to re-copy PAE l3 entries whevever we use
-     * PAE shadows. 
-     *
-     * Because HVM guests run on the same monitor tables regardless of the 
-     * shadow tables in use, the linear mapping of the shadow tables has to 
-     * be updated every time v->arch.shadow_table changes. 
-     */
-
-    /* Don't try to update the monitor table if it doesn't exist */
-    if ( shadow2_mode_external(d) 
-         && pagetable_get_pfn(v->arch.monitor_table) == 0 ) 
-        return;
-
-#if (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS == 4)
-    
-    /* For PV, one l4e points at the guest l4, one points at the shadow
-     * l4.  No maintenance required. 
-     * For HVM, just need to update the l4e that points to the shadow l4. */
-
-    if ( shadow2_mode_external(d) )
-    {
-        /* Use the linear map if we can; otherwise make a new mapping */
-        if ( v == current ) 
-        {
-            __linear_l4_table[l4_linear_offset(SH_LINEAR_PT_VIRT_START)] = 
-                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
-                             __PAGE_HYPERVISOR);
-        } 
-        else
-        { 
-            l4_pgentry_t *ml4e;
-            ml4e = sh2_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
-            ml4e[l4_table_offset(SH_LINEAR_PT_VIRT_START)] = 
-                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
-                             __PAGE_HYPERVISOR);
-            sh2_unmap_domain_page(ml4e);
-        }
-    }
-
-#elif (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS == 3)
-
-    /* This case only exists in HVM.  To give ourselves a linear map of the 
-     * shadows, we need to extend a PAE shadow to 4 levels.  We do this by 
-     * having a monitor l3 in slot 0 of the monitor l4 table, and 
-     * copying the PAE l3 entries into it.  Then, by having the monitor l4e
-     * for shadow pagetables also point to the monitor l4, we can use it
-     * to access the shadows. */
-
-    if ( shadow2_mode_external(d) )
-    {
-        /* Install copies of the shadow l3es into the monitor l3 table.
-         * The monitor l3 table is hooked into slot 0 of the monitor
-         * l4 table, so we use l3 linear indices 0 to 3 */
-        shadow_l3e_t *sl3e;
-        l3_pgentry_t *ml3e;
-        mfn_t l3mfn;
-        int i;
-
-        /* Use linear mappings if we can; otherwise make new mappings */
-        if ( v == current ) 
-        {
-            ml3e = __linear_l3_table;
-            l3mfn = _mfn(l4e_get_pfn(__linear_l4_table[0]));
-#if GUEST_PAGING_LEVELS == 2
-            /* Shadow l3 tables are made up by update_cr3 */
-            sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
-#else
-            sl3e = v->arch.shadow_vtable;
-#endif
-        }
-        else 
-        {   
-            l4_pgentry_t *ml4e;
-            ml4e = sh2_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
-            ASSERT(l4e_get_flags(ml4e[0]) & _PAGE_PRESENT);
-            l3mfn = _mfn(l4e_get_pfn(ml4e[0]));
-            ml3e = sh2_map_domain_page(l3mfn);
-            sh2_unmap_domain_page(ml4e);
-#if GUEST_PAGING_LEVELS == 2
-            /* Shadow l3 tables are made up by update_cr3 */
-            sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
-#else
-            sl3e = sh2_map_domain_page(pagetable_get_mfn(v->arch.shadow_table));
-#endif
-        }
-
-        for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
-        {
-            ml3e[i] = 
-                (shadow_l3e_get_flags(sl3e[i]) & _PAGE_PRESENT) 
-                ? l3e_from_pfn(mfn_x(shadow_l3e_get_mfn(sl3e[i])), 
-                               __PAGE_HYPERVISOR) 
-                : l3e_empty();
-        }
-
-        if ( v != current ) 
-        {
-            sh2_unmap_domain_page(ml3e);
-#if GUEST_PAGING_LEVELS != 2
-            sh2_unmap_domain_page(sl3e);
-#endif
-        }
-    }
-
-#elif CONFIG_PAGING_LEVELS == 3
-
-    /* PV: need to copy the guest's l3 entries into the guest-linear-map l2
-     * entries in the shadow, and the shadow's l3 entries into the 
-     * shadow-linear-map l2 entries in the shadow.  This is safe to do 
-     * because Xen does not let guests share high-slot l2 tables between l3s,
-     * so we know we're not treading on anyone's toes. 
-     *
-     * HVM: need to copy the shadow's l3 entries into the
-     * shadow-linear-map l2 entries in the monitor table.  This is safe
-     * because we have one monitor table for each vcpu.  The monitor's
-     * own l3es don't need to be copied because they never change.  
-     * XXX That might change if we start stuffing things into the rest
-     * of the monitor's virtual address space. 
-     */ 
-    {
-        l2_pgentry_t *l2e, new_l2e;
-        shadow_l3e_t *guest_l3e = NULL, *shadow_l3e;
-        int i;
-
-#if GUEST_PAGING_LEVELS == 2
-        /* Shadow l3 tables were built by update_cr3 */
-        if ( shadow2_mode_external(d) )
-            shadow_l3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
-        else
-            BUG(); /* PV 2-on-3 is not supported yet */
-        
-#else /* GUEST_PAGING_LEVELS == 3 */
-        
-        /* Use local vcpu's mappings if we can; otherwise make new mappings */
-        if ( v == current ) 
-        {
-            shadow_l3e = v->arch.shadow_vtable;
-            if ( !shadow2_mode_external(d) )
-                guest_l3e = v->arch.guest_vtable;
-        }
-        else 
-        {
-            mfn_t smfn;
-            int idx;
-            
-            /* Map the shadow l3 */
-            smfn = pagetable_get_mfn(v->arch.shadow_table);
-            idx = shadow_l3_index(&smfn, guest_index(v->arch.shadow_vtable));
-            shadow_l3e = sh2_map_domain_page(smfn);
-            shadow_l3e += idx;
-            if ( !shadow2_mode_external(d) )
-            {
-                /* Also the guest l3 */
-                mfn_t gmfn = pagetable_get_mfn(v->arch.guest_table); 
-                guest_l3e = sh2_map_domain_page(gmfn);
-                guest_l3e += guest_index(v->arch.guest_vtable);
-            }
-        }
-#endif /* GUEST_PAGING_LEVELS */
-        
-        /* Choose where to write the entries, using linear maps if possible */
-        if ( v == current && shadow2_mode_external(d) ) 
-        {
-            /* From the monitor tables, it's safe to use linear maps to update
-             * monitor l2s */
-            l2e = __linear_l2_table + (3 * L2_PAGETABLE_ENTRIES);
-        }
-        else if ( shadow2_mode_external(d) ) 
-        {
-            /* Map the monitor table's high l2 */
-            l3_pgentry_t *l3e;
-            l3e = sh2_map_domain_page(
-                pagetable_get_mfn(v->arch.monitor_table));
-            ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
-            l2e = sh2_map_domain_page(_mfn(l3e_get_pfn(l3e[3])));
-            sh2_unmap_domain_page(l3e);
-        } 
-        else 
-        {
-            /* Map the shadow table's high l2 */
-            ASSERT(shadow_l3e_get_flags(shadow_l3e[3]) & _PAGE_PRESENT);
-            l2e = sh2_map_domain_page(shadow_l3e_get_mfn(shadow_l3e[3]));
-        }
-        
-        
-        if ( !shadow2_mode_external(d) )
-        {
-            /* Write linear mapping of guest. */
-            for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
-            { 
-                new_l2e = (shadow_l3e_get_flags(guest_l3e[i]) & _PAGE_PRESENT) 
-                    ? l2e_from_pfn(mfn_x(shadow_l3e_get_mfn(guest_l3e[i])),
-                                   __PAGE_HYPERVISOR) 
-                    : l2e_empty();
-                safe_write_entry(
-                    &l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i],
-                    &new_l2e);
-            }
-        }
-        
-        /* Write linear mapping of shadow. */
-        for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
-        {
-            new_l2e = (shadow_l3e_get_flags(shadow_l3e[i]) & _PAGE_PRESENT) 
-                ? l2e_from_pfn(mfn_x(shadow_l3e_get_mfn(shadow_l3e[i])),
-                               __PAGE_HYPERVISOR) 
-                : l2e_empty();
-            safe_write_entry(
-                &l2e[l2_table_offset(SH_LINEAR_PT_VIRT_START) + i],
-                &new_l2e);
-        }
-        
-        if ( v != current || !shadow2_mode_external(d) )
-            sh2_unmap_domain_page(l2e);
-        
-#if GUEST_PAGING_LEVELS == 3
-        if ( v != current) 
-        {
-            sh2_unmap_domain_page(shadow_l3e);
-            if ( !shadow2_mode_external(d) )
-                sh2_unmap_domain_page(guest_l3e);
-        }
-#endif
-    }
-
-#elif CONFIG_PAGING_LEVELS == 2
-
-    /* For PV, one l2e points at the guest l2, one points at the shadow
-     * l2. No maintenance required. 
-     * For HVM, just need to update the l2e that points to the shadow l2. */
-
-    if ( shadow2_mode_external(d) )
-    {
-        /* Use the linear map if we can; otherwise make a new mapping */
-        if ( v == current ) 
-        {
-            __linear_l2_table[l2_linear_offset(SH_LINEAR_PT_VIRT_START)] = 
-                l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
-                             __PAGE_HYPERVISOR);
-        } 
-        else
-        { 
-            l2_pgentry_t *ml2e;
-            ml2e = sh2_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
-            ml2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = 
-                l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
-                             __PAGE_HYPERVISOR);
-            sh2_unmap_domain_page(ml2e);
-        }
-    }
-
-#else
-#error this should not happen
-#endif
-}
-
-
-// XXX -- should this be in this file?
-//        Or should it be moved to shadow2-common.c?
-//
-#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
-void sh2_pae_recopy(struct domain *d)
-/* Called whenever we write to the l3 entries of a PAE pagetable which 
- * is currently in use.  Each vcpu that is using the table needs to 
- * resync its copies of the l3s in linear maps and any low-memory
- * copies it might have made for fitting into 32bit CR3.
- * Since linear maps are also resynced when we change CR3, we don't
- * need to worry about changes to PAE l3es that are not currently in use.*/
-{
-    struct vcpu *v;
-    cpumask_t flush_mask = CPU_MASK_NONE;
-    ASSERT(shadow2_lock_is_acquired(d));
-    
-    for_each_vcpu(d, v)
-    {
-        if ( !v->arch.shadow2.pae_flip_pending ) 
-            continue;
-
-        cpu_set(v->processor, flush_mask);
-        
-        SHADOW2_PRINTK("d=%u v=%u\n", v->domain->domain_id, v->vcpu_id);
-
-        /* This vcpu has a copy in its linear maps */
-        sh2_update_linear_entries(v);
-        if ( hvm_guest(v) )
-        {
-            /* This vcpu has a copy in its HVM PAE l3 */
-            v->arch.hvm_vcpu.hw_cr3 = 
-                hvm_pae_copy_root(v, v->arch.shadow_vtable,
-                                  !shadow2_vcpu_mode_translate(v));
-        }
-#if CONFIG_PAGING_LEVELS == 3
-        else 
-        {
-            /* This vcpu might have copied the l3 to below 4GB */
-            if ( v->arch.cr3 >> PAGE_SHIFT 
-                 != pagetable_get_pfn(v->arch.shadow_table) )
-            {
-                /* Recopy to where that copy is. */
-                int i;
-                l3_pgentry_t *dst, *src;
-                dst = __va(v->arch.cr3 & ~0x1f); /* Mask cache control bits */
-                src = v->arch.shadow_vtable;
-                for ( i = 0 ; i < 4 ; i++ ) 
-                    safe_write_entry(dst + i, src + i);
-            }
-        }
-#endif
-        v->arch.shadow2.pae_flip_pending = 0;        
-    }
-
-    flush_tlb_mask(flush_mask);
-}
-#endif /* (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3) */
-
-
-/* removes:
- *     vcpu->arch.guest_vtable
- *     vcpu->arch.shadow_table
- *     vcpu->arch.shadow_vtable
- * Does all appropriate management/bookkeeping/refcounting/etc...
- */
-static void
-sh2_detach_old_tables(struct vcpu *v)
-{
-    mfn_t smfn;
-
-    ////
-    //// vcpu->arch.guest_vtable
-    ////
-    if ( (shadow2_mode_external(v->domain) || (GUEST_PAGING_LEVELS == 3)) &&
-         v->arch.guest_vtable )
-    {
-        // Q: why does this need to use (un)map_domain_page_*global* ?
-        sh2_unmap_domain_page_global(v->arch.guest_vtable);
-        v->arch.guest_vtable = NULL;
-    }
-
-    ////
-    //// vcpu->arch.shadow_table
-    ////
-    smfn = pagetable_get_mfn(v->arch.shadow_table);
-    if ( mfn_x(smfn) )
-    {
-        ASSERT(v->arch.shadow_vtable);
-
-#if GUEST_PAGING_LEVELS == 3
-        // PAE guests do not (necessarily) use an entire page for their
-        // 4-entry L3s, so we have to deal with them specially.
-        //
-        sh2_put_ref_l3_subshadow(v, v->arch.shadow_vtable, smfn);
-#else
-        sh2_put_ref(v, smfn, 0);
-#endif
-
-#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
-        {
-            struct pae_l3_bookkeeping *info =
-                sl3p_to_info(v->arch.shadow_vtable);
-            ASSERT(test_bit(v->vcpu_id, &info->vcpus));
-            clear_bit(v->vcpu_id, &info->vcpus);
-        }
-#endif
-        v->arch.shadow_table = pagetable_null();
-    }
-
-    ////
-    //// vcpu->arch.shadow_vtable
-    ////
-    if ( (shadow2_mode_external(v->domain) || (GUEST_PAGING_LEVELS == 3)) &&
-         v->arch.shadow_vtable )
-    {
-        // Q: why does this need to use (un)map_domain_page_*global* ?
-        //
-        sh2_unmap_domain_page_global(v->arch.shadow_vtable);
-        v->arch.shadow_vtable = NULL;
-    }
-}
-
-static void
-sh2_update_cr3(struct vcpu *v)
-/* Updates vcpu->arch.shadow_table after the guest has changed CR3.
- * Paravirtual guests should set v->arch.guest_table (and guest_table_user,
- * if appropriate).
- * HVM guests should also set hvm_get_guest_cntl_reg(v, 3)...
- */
-{
-    struct domain *d = v->domain;
-    mfn_t gmfn, smfn;
-#if GUEST_PAGING_LEVELS == 3
-    u32 guest_idx=0;
-#endif
-
-    ASSERT(shadow2_lock_is_acquired(v->domain));
-    ASSERT(v->arch.shadow2.mode);
-
-    ////
-    //// vcpu->arch.guest_table is already set
-    ////
-    
-#ifndef NDEBUG 
-    /* Double-check that the HVM code has sent us a sane guest_table */
-    if ( hvm_guest(v) )
-    {
-        gfn_t gfn;
-
-        ASSERT(shadow2_mode_external(d));
-
-        // Is paging enabled on this vcpu?
-        if ( shadow2_vcpu_mode_translate(v) )
-        {
-            gfn = _gfn(paddr_to_pfn(hvm_get_guest_ctrl_reg(v, 3)));
-            gmfn = vcpu_gfn_to_mfn(v, gfn);
-            ASSERT(valid_mfn(gmfn));
-            ASSERT(pagetable_get_pfn(v->arch.guest_table) == mfn_x(gmfn));
-        } 
-        else 
-        {
-            /* Paging disabled: guest_table points at (part of) p2m */
-#if SHADOW_PAGING_LEVELS != 3 /* in 3-on-4, guest-table is in slot 0 of p2m */
-            /* For everything else, they sould be the same */
-            ASSERT(v->arch.guest_table.pfn == d->arch.phys_table.pfn);
-#endif
-        }
-    }
-#endif
-
-    SHADOW2_PRINTK("d=%u v=%u guest_table=%05lx\n",
-                   d->domain_id, v->vcpu_id, 
-                   (unsigned long)pagetable_get_pfn(v->arch.guest_table));
-
-#if GUEST_PAGING_LEVELS == 4
-    if ( !(v->arch.flags & TF_kernel_mode) )
-        gmfn = pagetable_get_mfn(v->arch.guest_table_user);
-    else
-#endif
-        gmfn = pagetable_get_mfn(v->arch.guest_table);
-
-    sh2_detach_old_tables(v);
-
-    if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
-    {
-        ASSERT(v->arch.cr3 == 0);
-        return;
-    }
-
-    ////
-    //// vcpu->arch.guest_vtable
-    ////
-    if ( shadow2_mode_external(d) )
-    {
-#if GUEST_PAGING_LEVELS == 3
-        if ( shadow2_vcpu_mode_translate(v) ) 
-            /* Paging enabled: find where in the page the l3 table is */
-            guest_idx = guest_index((void *)hvm_get_guest_ctrl_reg(v, 3));
-        else
-            /* Paging disabled: l3 is at the start of a page (in the p2m) */ 
-            guest_idx = 0; 
-
-        // Ignore the low 2 bits of guest_idx -- they are really just
-        // cache control.
-        guest_idx &= ~3;
-        // XXX - why does this need a global map?
-        v->arch.guest_vtable =
-            (guest_l3e_t *)sh2_map_domain_page_global(gmfn) + guest_idx;
-#else
-        // XXX - why does this need a global map?
-        v->arch.guest_vtable = sh2_map_domain_page_global(gmfn);
-#endif
-    }
-    else
-    {
-#ifdef __x86_64__
-        v->arch.guest_vtable = __linear_l4_table;
-#elif GUEST_PAGING_LEVELS == 3
-        // XXX - why does this need a global map?
-        v->arch.guest_vtable = sh2_map_domain_page_global(gmfn);
-#else
-        v->arch.guest_vtable = __linear_l2_table;
-#endif
-    }
-
-#if 0
-    printk("%s %s %d gmfn=%05lx guest_vtable=%p\n",
-           __func__, __FILE__, __LINE__, gmfn, v->arch.guest_vtable);
-#endif
-
-    ////
-    //// vcpu->arch.shadow_table
-    ////
-    smfn = get_shadow_status(v, gmfn, PGC_SH2_guest_root_type);
-    if ( valid_mfn(smfn) )
-    {
-        /* Pull this root shadow to the front of the list of roots. */
-        list_del(&mfn_to_page(smfn)->list);
-        list_add(&mfn_to_page(smfn)->list, &d->arch.shadow2.toplevel_shadows);
-    }
-    else
-    {
-        /* This guest MFN is a pagetable.  Must revoke write access. */
-        if ( shadow2_remove_write_access(v, gmfn, GUEST_PAGING_LEVELS, 0) 
-             != 0 )
-            flush_tlb_mask(d->domain_dirty_cpumask); 
-        /* Make sure there's enough free shadow memory. */
-        shadow2_prealloc(d, SHADOW2_MAX_ORDER); 
-        /* Shadow the page. */
-        smfn = sh2_make_shadow(v, gmfn, PGC_SH2_guest_root_type);
-        list_add(&mfn_to_page(smfn)->list, &d->arch.shadow2.toplevel_shadows);
-    }
-    ASSERT(valid_mfn(smfn));
-    v->arch.shadow_table = pagetable_from_mfn(smfn);
-
-#if SHADOW2_OPTIMIZATIONS & SH2OPT_EARLY_UNSHADOW
-    /* Once again OK to unhook entries from this table if we see fork/exit */
-    ASSERT(sh2_mfn_is_a_page_table(gmfn));
-    mfn_to_page(gmfn)->shadow2_flags &= ~SH2F_unhooked_mappings;
-#endif
-
-
-    ////
-    //// vcpu->arch.shadow_vtable
-    ////
-    if ( shadow2_mode_external(d) )
-    {
-#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
-        mfn_t adjusted_smfn = smfn;
-        u32 shadow_idx = shadow_l3_index(&adjusted_smfn, guest_idx);
-        // Q: why does this need to use (un)map_domain_page_*global* ?
-        v->arch.shadow_vtable =
-            (shadow_l3e_t *)sh2_map_domain_page_global(adjusted_smfn) +
-            shadow_idx;
-#else
-        // Q: why does this need to use (un)map_domain_page_*global* ?
-        v->arch.shadow_vtable = sh2_map_domain_page_global(smfn);
-#endif
-    }
-    else
-    {
-#if SHADOW_PAGING_LEVELS == 4
-        v->arch.shadow_vtable = __sh2_linear_l4_table;
-#elif GUEST_PAGING_LEVELS == 3
-        // XXX - why does this need a global map?
-        v->arch.shadow_vtable = sh2_map_domain_page_global(smfn);
-#else
-        v->arch.shadow_vtable = __sh2_linear_l2_table;
-#endif
-    }
-
-    ////
-    //// Take a ref to the new shadow table, and pin it.
-    ////
-    //
-    // This ref is logically "held" by v->arch.shadow_table entry itself.
-    // Release the old ref.
-    //
-#if GUEST_PAGING_LEVELS == 3
-    // PAE guests do not (necessarily) use an entire page for their
-    // 4-entry L3s, so we have to deal with them specially.
-    //
-    // XXX - might want to revisit this if/when we do multiple compilation for
-    //       HVM-vs-PV guests, as PAE PV guests could get away without doing
-    //       subshadows.
-    //
-    sh2_get_ref_l3_subshadow(v->arch.shadow_vtable, smfn);
-    sh2_pin_l3_subshadow(v->arch.shadow_vtable, smfn);
-#else
-    sh2_get_ref(smfn, 0);
-    sh2_pin(smfn);
-#endif
-
-#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
-    // PAE 3-on-3 shadows have to keep track of which vcpu's are using
-    // which l3 subshadow, in order handle the SHADOW2_SET_L3PAE_RECOPY
-    // case from validate_gl3e().  Search for SHADOW2_SET_L3PAE_RECOPY
-    // in the code for more info.
-    //
-    {
-        struct pae_l3_bookkeeping *info =
-            sl3p_to_info(v->arch.shadow_vtable);
-        ASSERT(!test_bit(v->vcpu_id, &info->vcpus));
-        set_bit(v->vcpu_id, &info->vcpus);
-    }
-#endif
-
-    debugtrace_printk("%s cr3 gmfn=%05lx smfn=%05lx\n",
-                      __func__, gmfn, smfn);
-
-    ///
-    /// v->arch.cr3 and, if appropriate, v->arch.hvm_vcpu.hw_cr3
-    ///
-    if ( shadow2_mode_external(d) )
-    {
-        ASSERT(hvm_guest(v));
-        make_cr3(v, pagetable_get_pfn(v->arch.monitor_table));
-
-#if (GUEST_PAGING_LEVELS == 2) && (SHADOW_PAGING_LEVELS != 2)
-#if SHADOW_PAGING_LEVELS != 3
-#error unexpected combination of GUEST and SHADOW paging levels
-#endif
-        /* 2-on-3: make a PAE l3 table that points at the four-page l2 */
-        {
-            mfn_t smfn = pagetable_get_mfn(v->arch.shadow_table);
-            int i;
-
-            ASSERT(v->arch.hvm_vcpu.hw_cr3 ==
-                   virt_to_maddr(v->arch.hvm_vcpu.hvm_lowmem_l3tab));
-            for (i = 0; i < 4; i++)
-            {
-                v->arch.hvm_vcpu.hvm_lowmem_l3tab[i] =
-                    shadow_l3e_from_mfn(_mfn(mfn_x(smfn)+i), _PAGE_PRESENT);
-            }
-        }
-#elif (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
-        /* 3-on-3: copy the shadow l3 to slots that are below 4GB.
-         * If paging is disabled, clear l3e reserved bits; otherwise 
-         * remove entries that have reserved bits set. */
-        v->arch.hvm_vcpu.hw_cr3 =
-            hvm_pae_copy_root(v, v->arch.shadow_vtable, 
-                              !shadow2_vcpu_mode_translate(v));
-#else
-        /* 2-on-2 or 4-on-4: just put the shadow top-level into cr3 */
-        v->arch.hvm_vcpu.hw_cr3 =
-            pagetable_get_paddr(v->arch.shadow_table);
-#endif
-    }
-    else // not shadow2_mode_external...
-    {
-        /* We don't support PV except guest == shadow == config levels */
-        BUG_ON(GUEST_PAGING_LEVELS != SHADOW_PAGING_LEVELS);
-        make_cr3(v, pagetable_get_pfn(v->arch.shadow_table));
-    }
-
-    /* Fix up the linear pagetable mappings */
-    sh2_update_linear_entries(v);
-}
-
-
-/**************************************************************************/
-/* Functions to revoke guest rights */
-
-#if SHADOW2_OPTIMIZATIONS & SH2OPT_WRITABLE_HEURISTIC
-static int sh2_guess_wrmap(struct vcpu *v, unsigned long vaddr, mfn_t gmfn)
-/* Look up this vaddr in the current shadow and see if it's a writeable
- * mapping of this gmfn.  If so, remove it.  Returns 1 if it worked. */
-{
-    shadow_l1e_t sl1e, *sl1p;
-    shadow_l2e_t *sl2p;
-#if GUEST_PAGING_LEVELS >= 3
-    shadow_l3e_t *sl3p;
-#if GUEST_PAGING_LEVELS >= 4
-    shadow_l4e_t *sl4p;
-#endif
-#endif
-    mfn_t sl1mfn;
-
-
-    /* Carefully look in the shadow linear map for the l1e we expect */
-    if ( v->arch.shadow_vtable == NULL ) return 0;
-#if GUEST_PAGING_LEVELS >= 4
-    sl4p = sh2_linear_l4_table(v) + shadow_l4_linear_offset(vaddr);
-    if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) )
-        return 0;
-    sl3p = sh2_linear_l3_table(v) + shadow_l3_linear_offset(vaddr);
-    if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) )
-        return 0;
-#elif GUEST_PAGING_LEVELS == 3
-    sl3p = ((shadow_l3e_t *) v->arch.shadow_vtable) 
-        + shadow_l3_linear_offset(vaddr);
-    if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) )
-        return 0;
-#endif
-    sl2p = sh2_linear_l2_table(v) + shadow_l2_linear_offset(vaddr);
-    if ( !(shadow_l2e_get_flags(*sl2p) & _PAGE_PRESENT) )
-        return 0;
-    sl1p = sh2_linear_l1_table(v) + shadow_l1_linear_offset(vaddr);
-    sl1e = *sl1p;
-    if ( ((shadow_l1e_get_flags(sl1e) & (_PAGE_PRESENT|_PAGE_RW))
-          != (_PAGE_PRESENT|_PAGE_RW))
-         || (mfn_x(shadow_l1e_get_mfn(sl1e)) != mfn_x(gmfn)) )
-        return 0;
-
-    /* Found it!  Need to remove its write permissions. */
-    sl1mfn = shadow_l2e_get_mfn(*sl2p);
-    sl1e = shadow_l1e_remove_flags(sl1e, _PAGE_RW);
-    shadow_set_l1e(v, sl1p, sl1e, sl1mfn);
-    return 1;
-}
-#endif
-
-int sh2_remove_write_access(struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn)
-/* Excises all writeable mappings to readonly_mfn from this l1 shadow table */
-{
-    shadow_l1e_t *sl1e;
-    int done = 0;
-    int flags;
-    
-    SHADOW2_FOREACH_L1E(sl1mfn, sl1e, 0, done, 
-    {
-        flags = shadow_l1e_get_flags(*sl1e);
-        if ( (flags & _PAGE_PRESENT) 
-             && (flags & _PAGE_RW) 
-             && (mfn_x(shadow_l1e_get_mfn(*sl1e)) == mfn_x(readonly_mfn)) )
-        {
-            shadow_set_l1e(v, sl1e, shadow_l1e_empty(), sl1mfn);
-            if ( (mfn_to_page(readonly_mfn)->u.inuse.type_info
-                  & PGT_count_mask) == 0 )
-                /* This breaks us cleanly out of the FOREACH macro */
-                done = 1;
-        }
-    });
-    return done;
-}
-
-
-int sh2_remove_all_mappings(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn)
-/* Excises all mappings to guest frame from this shadow l1 table */
-{
-    shadow_l1e_t *sl1e;
-    int done = 0;
-    int flags;
-    
-    SHADOW2_FOREACH_L1E(sl1mfn, sl1e, 0, done, 
-    {
-        flags = shadow_l1e_get_flags(*sl1e);
-        if ( (flags & _PAGE_PRESENT) 
-             && (mfn_x(shadow_l1e_get_mfn(*sl1e)) == mfn_x(target_mfn)) )
-        {
-            shadow_set_l1e(v, sl1e, shadow_l1e_empty(), sl1mfn);
-            if ( (mfn_to_page(target_mfn)->count_info & PGC_count_mask) == 0 )
-                /* This breaks us cleanly out of the FOREACH macro */
-                done = 1;
-        }
-    });
-    return done;
-}
-
-/**************************************************************************/
-/* Functions to excise all pointers to shadows from higher-level shadows. */
-
-void sh2_clear_shadow_entry(struct vcpu *v, void *ep, mfn_t smfn)
-/* Blank out a single shadow entry */
-{
-    switch (mfn_to_page(smfn)->count_info & PGC_SH2_type_mask) 
-    {
-    case PGC_SH2_l1_shadow:
-        shadow_set_l1e(v, ep, shadow_l1e_empty(), smfn); break;
-    case PGC_SH2_l2_shadow:
-#if GUEST_PAGING_LEVELS == 3
-    case PGC_SH2_l2h_shadow:
-#endif
-        shadow_set_l2e(v, ep, shadow_l2e_empty(), smfn); break;
-#if GUEST_PAGING_LEVELS >= 3
-    case PGC_SH2_l3_shadow:
-        shadow_set_l3e(v, ep, shadow_l3e_empty(), smfn); break;
-#if GUEST_PAGING_LEVELS >= 4
-    case PGC_SH2_l4_shadow:
-        shadow_set_l4e(v, ep, shadow_l4e_empty(), smfn); break;
-#endif
-#endif
-    default: BUG(); /* Called with the wrong kind of shadow. */
-    }
-}
-
-int sh2_remove_l1_shadow(struct vcpu *v, mfn_t sl2mfn, mfn_t sl1mfn)
-/* Remove all mappings of this l1 shadow from this l2 shadow */
-{
-    shadow_l2e_t *sl2e;
-    int done = 0;
-    int flags;
-#if GUEST_PAGING_LEVELS != 4
-    int xen_mappings = !shadow2_mode_external(v->domain);
-#endif
-    
-    SHADOW2_FOREACH_L2E(sl2mfn, sl2e, 0, done, xen_mappings, 
-    {
-        flags = shadow_l2e_get_flags(*sl2e);
-        if ( (flags & _PAGE_PRESENT) 
-             && (mfn_x(shadow_l2e_get_mfn(*sl2e)) == mfn_x(sl1mfn)) )
-        {
-            shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
-            if ( (mfn_to_page(sl1mfn)->count_info & PGC_SH2_type_mask) == 0 )
-                /* This breaks us cleanly out of the FOREACH macro */
-                done = 1;
-        }
-    });
-    return done;
-}
-
-#if GUEST_PAGING_LEVELS >= 3
-int sh2_remove_l2_shadow(struct vcpu *v, mfn_t sl3mfn, mfn_t sl2mfn)
-/* Remove all mappings of this l2 shadow from this l3 shadow */
-{
-    shadow_l3e_t *sl3e;
-    int done = 0;
-    int flags;
-    
-    SHADOW2_FOREACH_L3E(sl3mfn, sl3e, 0, done, 
-    {
-        flags = shadow_l3e_get_flags(*sl3e);
-        if ( (flags & _PAGE_PRESENT) 
-             && (mfn_x(shadow_l3e_get_mfn(*sl3e)) == mfn_x(sl2mfn)) )
-        {
-            shadow_set_l3e(v, sl3e, shadow_l3e_empty(), sl3mfn);
-            if ( (mfn_to_page(sl2mfn)->count_info & PGC_SH2_type_mask) == 0 )
-                /* This breaks us cleanly out of the FOREACH macro */
-                done = 1;
-        }
-    });
-    return done;
-}
-
-#if GUEST_PAGING_LEVELS >= 4
-int sh2_remove_l3_shadow(struct vcpu *v, mfn_t sl4mfn, mfn_t sl3mfn)
-/* Remove all mappings of this l3 shadow from this l4 shadow */
-{
-    shadow_l4e_t *sl4e;
-    int done = 0;
-    int flags, xen_mappings = !shadow2_mode_external(v->domain);
-    
-    SHADOW2_FOREACH_L4E(sl4mfn, sl4e, 0, done, xen_mappings,
-    {
-        flags = shadow_l4e_get_flags(*sl4e);
-        if ( (flags & _PAGE_PRESENT) 
-             && (mfn_x(shadow_l4e_get_mfn(*sl4e)) == mfn_x(sl3mfn)) )
-        {
-            shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn);
-            if ( (mfn_to_page(sl3mfn)->count_info & PGC_SH2_type_mask) == 0 )
-                /* This breaks us cleanly out of the FOREACH macro */
-                done = 1;
-        }
-    });
-    return done;
-}
-#endif /* 64bit guest */ 
-#endif /* PAE guest */
-
-/**************************************************************************/
-/* Handling HVM guest writes to pagetables  */
-
-/* Check that the user is allowed to perform this write. 
- * Returns a mapped pointer to write to, and the mfn it's on,
- * or NULL for error. */
-static inline void * emulate_map_dest(struct vcpu *v,
-                                      unsigned long vaddr,
-                                      struct x86_emulate_ctxt *ctxt,
-                                      mfn_t *mfnp)
-{
-    walk_t gw;
-    u32 flags;
-    gfn_t gfn;
-    mfn_t mfn;
-
-    guest_walk_tables(v, vaddr, &gw, 1);
-    flags = accumulate_guest_flags(&gw);
-    gfn = guest_l1e_get_gfn(gw.eff_l1e);
-    mfn = vcpu_gfn_to_mfn(v, gfn);
-    sh2_audit_gw(v, &gw);
-    unmap_walk(v, &gw);
-
-    if ( !(flags & _PAGE_PRESENT) 
-         || !(flags & _PAGE_RW) 
-         || (!(flags & _PAGE_USER) && ring_3(ctxt->regs)) )
-    {
-        /* This write would have faulted even on bare metal */
-        v->arch.shadow2.propagate_fault = 1;
-        return NULL;
-    }
-    
-    if ( !valid_mfn(mfn) )
-    {
-        /* Attempted a write to a bad gfn.  This should never happen:
-         * after all, we're here because this write is to a page table. */
-        BUG();
-    }
-
-    ASSERT(sh2_mfn_is_a_page_table(mfn));
-    *mfnp = mfn;
-    return sh2_map_domain_page(mfn) + (vaddr & ~PAGE_MASK);
-}
-
-int
-sh2_x86_emulate_write(struct vcpu *v, unsigned long vaddr, void *src,
-                      u32 bytes, struct x86_emulate_ctxt *ctxt)
-{
-    ASSERT(shadow2_lock_is_acquired(v->domain));
-    while ( bytes > 0 )
-    {
-        mfn_t mfn;
-        int bytes_on_page;
-        void *addr;
-
-        bytes_on_page = PAGE_SIZE - (vaddr & ~PAGE_MASK);
-        if ( bytes_on_page > bytes )
-            bytes_on_page = bytes;
-
-        if ( (addr = emulate_map_dest(v, vaddr, ctxt, &mfn)) == NULL )
-            return X86EMUL_PROPAGATE_FAULT;
-        memcpy(addr, src, bytes_on_page);
-        shadow2_validate_guest_pt_write(v, mfn, addr, bytes_on_page);
-        bytes -= bytes_on_page;
-        /* If we are writing zeros to this page, might want to unshadow */
-        if ( *(u8 *)addr == 0 )
-            check_for_early_unshadow(v, mfn);
-        sh2_unmap_domain_page(addr);
-    }
-    shadow2_audit_tables(v);
-    return X86EMUL_CONTINUE;
-}
-
-int
-sh2_x86_emulate_cmpxchg(struct vcpu *v, unsigned long vaddr, 
-                        unsigned long old, unsigned long new,
-                        unsigned int bytes, struct x86_emulate_ctxt *ctxt)
-{
-    mfn_t mfn;
-    void *addr;
-    unsigned long prev;
-    int rv = X86EMUL_CONTINUE;
-
-    ASSERT(shadow2_lock_is_acquired(v->domain));
-    ASSERT(bytes <= sizeof (unsigned long));
-
-    if ( (addr = emulate_map_dest(v, vaddr, ctxt, &mfn)) == NULL )
-        return X86EMUL_PROPAGATE_FAULT;
-
-    switch (bytes) 
-    {
-    case 1: prev = cmpxchg(((u8 *)addr), old, new);  break;
-    case 2: prev = cmpxchg(((u16 *)addr), old, new); break;
-    case 4: prev = cmpxchg(((u32 *)addr), old, new); break;
-    case 8: prev = cmpxchg(((u64 *)addr), old, new); break;
-    default:
-        SHADOW2_PRINTK("cmpxchg of size %i is not supported\n", bytes);
-        prev = ~old;
-    }
-
-    if ( (prev == old)  )
-        shadow2_validate_guest_pt_write(v, mfn, addr, bytes);
-    else
-        rv = X86EMUL_CMPXCHG_FAILED;
-
-    SHADOW2_DEBUG(EMULATE, "va %#lx was %#lx expected %#lx"
-                  " wanted %#lx now %#lx bytes %u\n",
-                  vaddr, prev, old, new, *(unsigned long *)addr, bytes);
-
-    /* If we are writing zeros to this page, might want to unshadow */
-    if ( *(u8 *)addr == 0 )
-        check_for_early_unshadow(v, mfn);
-
-    sh2_unmap_domain_page(addr);
-    shadow2_audit_tables(v);
-    check_for_early_unshadow(v, mfn);
-    return rv;
-}
-
-int
-sh2_x86_emulate_cmpxchg8b(struct vcpu *v, unsigned long vaddr, 
-                          unsigned long old_lo, unsigned long old_hi,
-                          unsigned long new_lo, unsigned long new_hi,
-                          struct x86_emulate_ctxt *ctxt)
-{
-    mfn_t mfn;
-    void *addr;
-    u64 old, new, prev;
-    int rv = X86EMUL_CONTINUE;
-
-    ASSERT(shadow2_lock_is_acquired(v->domain));
-
-    if ( (addr = emulate_map_dest(v, vaddr, ctxt, &mfn)) == NULL )
-        return X86EMUL_PROPAGATE_FAULT;
-
-    old = (((u64) old_hi) << 32) | (u64) old_lo;
-    new = (((u64) new_hi) << 32) | (u64) new_lo;
-    prev = cmpxchg(((u64 *)addr), old, new);
-
-    if ( (prev == old)  )
-        shadow2_validate_guest_pt_write(v, mfn, addr, 8);
-    else
-        rv = X86EMUL_CMPXCHG_FAILED;
-
-    /* If we are writing zeros to this page, might want to unshadow */
-    if ( *(u8 *)addr == 0 )
-        check_for_early_unshadow(v, mfn);
-
-    sh2_unmap_domain_page(addr);
-    shadow2_audit_tables(v);
-    check_for_early_unshadow(v, mfn);
-    return rv;
-}
-
-
-/**************************************************************************/
-/* Audit tools */
-
-#if SHADOW2_AUDIT & SHADOW2_AUDIT_ENTRIES
-
-#define AUDIT_FAIL(_level, _fmt, _a...) do {                               \
-    printk("Shadow2 %u-on-%u audit failed at level %i, index %i\n"         \
-           "gl" #_level "mfn = %" SH2_PRI_mfn                              \
-           " sl" #_level "mfn = %" SH2_PRI_mfn                             \
-           " &gl" #_level "e = %p &sl" #_level "e = %p"                    \
-           " gl" #_level "e = %" SH2_PRI_gpte                              \
-           " sl" #_level "e = %" SH2_PRI_pte "\nError: " _fmt "\n",        \
-           GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS,                      \
-           _level, guest_index(gl ## _level ## e),                         \
-           mfn_x(gl ## _level ## mfn), mfn_x(sl ## _level ## mfn),         \
-           gl ## _level ## e, sl ## _level ## e,                           \
-           gl ## _level ## e->l ## _level, sl ## _level ## e->l ## _level, \
-           ##_a);                                                          \
-    BUG();                                                                 \
-    done = 1;                                                              \
-} while (0)
-
-
-static char * sh2_audit_flags(struct vcpu *v, int level,
-                              int gflags, int sflags) 
-/* Common code for auditing flag bits */
-{
-    if ( (sflags & _PAGE_PRESENT) && !(gflags & _PAGE_PRESENT) )
-        return "shadow is present but guest is not present";
-    if ( (sflags & _PAGE_GLOBAL) && !hvm_guest(v) ) 
-        return "global bit set in PV shadow";
-    if ( (level == 1 || (level == 2 && (gflags & _PAGE_PSE)))
-         && ((sflags & _PAGE_DIRTY) && !(gflags & _PAGE_DIRTY)) ) 
-        return "dirty bit not propagated";
-    if ( level == 2 && (sflags & _PAGE_PSE) )
-        return "PS bit set in shadow";
-#if SHADOW_PAGING_LEVELS == 3
-    if ( level == 3 ) return NULL; /* All the other bits are blank in PAEl3 */
-#endif
-    if ( (sflags & _PAGE_USER) != (gflags & _PAGE_USER) ) 
-        return "user/supervisor bit does not match";
-    if ( (sflags & _PAGE_NX_BIT) != (gflags & _PAGE_NX_BIT) ) 
-        return "NX bit does not match";
-    if ( (sflags & _PAGE_RW) && !(gflags & _PAGE_RW) ) 
-        return "shadow grants write access but guest does not";
-    if ( (sflags & _PAGE_ACCESSED) && !(gflags & _PAGE_ACCESSED) ) 
-        return "accessed bit not propagated";
-    return NULL;
-}
-
-static inline mfn_t
-audit_gfn_to_mfn(struct vcpu *v, gfn_t gfn, mfn_t gmfn)
-/* Convert this gfn to an mfn in the manner appropriate for the
- * guest pagetable it's used in (gmfn) */ 
-{
-    if ( !shadow2_mode_translate(v->domain) )
-        return _mfn(gfn_x(gfn));
-    
-    if ( (mfn_to_page(gmfn)->u.inuse.type_info & PGT_type_mask)
-         != PGT_writable_page ) 
-        return _mfn(gfn_x(gfn)); /* This is a paging-disabled shadow */
-    else 
-        return sh2_gfn_to_mfn(v->domain, gfn_x(gfn));
-} 
-
-
-int sh2_audit_l1_table(struct vcpu *v, mfn_t sl1mfn, mfn_t x)
-{
-    guest_l1e_t *gl1e, *gp;
-    shadow_l1e_t *sl1e;
-    mfn_t mfn, gmfn, gl1mfn;
-    gfn_t gfn;
-    char *s;
-    int done = 0;
-
-    /* Follow the backpointer */
-    gl1mfn = _mfn(mfn_to_page(sl1mfn)->u.inuse.type_info);
-    gl1e = gp = sh2_map_domain_page(gl1mfn);
-    SHADOW2_FOREACH_L1E(sl1mfn, sl1e, &gl1e, done, {
-
-        s = sh2_audit_flags(v, 1, guest_l1e_get_flags(*gl1e),
-                            shadow_l1e_get_flags(*sl1e));
-        if ( s ) AUDIT_FAIL(1, "%s", s);
-
-        if ( SHADOW2_AUDIT & SHADOW2_AUDIT_ENTRIES_MFNS )
-        {
-            gfn = guest_l1e_get_gfn(*gl1e);
-            mfn = shadow_l1e_get_mfn(*sl1e);
-            gmfn = audit_gfn_to_mfn(v, gfn, gl1mfn);
-            if ( mfn_x(gmfn) != mfn_x(mfn) )
-                AUDIT_FAIL(1, "bad translation: gfn %" SH2_PRI_gfn
-                           " --> %" SH2_PRI_mfn " != mfn %" SH2_PRI_mfn "\n",
-                           gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn));
-        }
-    });
-    sh2_unmap_domain_page(gp);
-    return done;
-}
-
-int sh2_audit_fl1_table(struct vcpu *v, mfn_t sl1mfn, mfn_t x)
-{
-    guest_l1e_t *gl1e, e;
-    shadow_l1e_t *sl1e;
-    mfn_t gl1mfn = _mfn(INVALID_MFN);
-    int f;
-    int done = 0;
-
-    /* fl1 has no useful backpointer: all we can check are flags */
-    e = guest_l1e_from_gfn(_gfn(0), 0); gl1e = &e; /* Needed for macro */
-    SHADOW2_FOREACH_L1E(sl1mfn, sl1e, 0, done, {
-        f = shadow_l1e_get_flags(*sl1e);
-        f &= ~(_PAGE_AVAIL0|_PAGE_AVAIL1|_PAGE_AVAIL2);
-        if ( !(f == 0 
-               || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
-                        _PAGE_ACCESSED|_PAGE_DIRTY) 
-               || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY)) )
-            AUDIT_FAIL(1, "fl1e has bad flags");
-    });
-    return 0;
-}
-
-int sh2_audit_l2_table(struct vcpu *v, mfn_t sl2mfn, mfn_t x)
-{
-    guest_l2e_t *gl2e, *gp;
-    shadow_l2e_t *sl2e;
-    mfn_t mfn, gmfn, gl2mfn;
-    gfn_t gfn;
-    char *s;
-    int done = 0;
-#if GUEST_PAGING_LEVELS != 4
-    int xen_mappings = !shadow2_mode_external(v->domain);
-#endif
-
-    /* Follow the backpointer */
-    gl2mfn = _mfn(mfn_to_page(sl2mfn)->u.inuse.type_info);
-    gl2e = gp = sh2_map_domain_page(gl2mfn);
-    SHADOW2_FOREACH_L2E(sl2mfn, sl2e, &gl2e, done, xen_mappings, {
-
-        s = sh2_audit_flags(v, 2, guest_l2e_get_flags(*gl2e),
-                            shadow_l2e_get_flags(*sl2e));
-        if ( s ) AUDIT_FAIL(2, "%s", s);
-
-        if ( SHADOW2_AUDIT & SHADOW2_AUDIT_ENTRIES_MFNS )
-        {
-            gfn = guest_l2e_get_gfn(*gl2e);
-            mfn = shadow_l2e_get_mfn(*sl2e);
-            gmfn = (guest_l2e_get_flags(*gl2e) & _PAGE_PSE)  
-                ? get_fl1_shadow_status(v, gfn)
-                : get_shadow_status(v, audit_gfn_to_mfn(v, gfn, gl2mfn), 
-                                    PGC_SH2_l1_shadow);
-            if ( mfn_x(gmfn) != mfn_x(mfn) )
-                AUDIT_FAIL(2, "bad translation: gfn %" SH2_PRI_gfn
-                           " (--> %" SH2_PRI_mfn ")"
-                           " --> %" SH2_PRI_mfn " != mfn %" SH2_PRI_mfn "\n",
-                           gfn_x(gfn), 
-                           (guest_l2e_get_flags(*gl2e) & _PAGE_PSE) ? 0
-                           : mfn_x(audit_gfn_to_mfn(v, gfn, gl2mfn)),
-                           mfn_x(gmfn), mfn_x(mfn));
-        }
-    });
-    sh2_unmap_domain_page(gp);
-    return 0;
-}
-
-#if GUEST_PAGING_LEVELS >= 3
-int sh2_audit_l3_table(struct vcpu *v, mfn_t sl3mfn, mfn_t x)
-{
-    guest_l3e_t *gl3e, *gp;
-    shadow_l3e_t *sl3e;
-    mfn_t mfn, gmfn, gl3mfn;
-    gfn_t gfn;
-    char *s;
-    int done = 0;
-
-    /* Follow the backpointer */
-    gl3mfn = _mfn(mfn_to_page(sl3mfn)->u.inuse.type_info);
-    gl3e = gp = sh2_map_domain_page(gl3mfn);
-    SHADOW2_FOREACH_L3E(sl3mfn, sl3e, &gl3e, done, {
-
-        s = sh2_audit_flags(v, 3, guest_l3e_get_flags(*gl3e),
-                            shadow_l3e_get_flags(*sl3e));
-        if ( s ) AUDIT_FAIL(3, "%s", s);
-
-        if ( SHADOW2_AUDIT & SHADOW2_AUDIT_ENTRIES_MFNS )
-        {
-            gfn = guest_l3e_get_gfn(*gl3e);
-            mfn = shadow_l3e_get_mfn(*sl3e);
-            gmfn = get_shadow_status(v, audit_gfn_to_mfn(v, gfn, gl3mfn), 
-                                     (GUEST_PAGING_LEVELS == 3 
-                                      && !shadow2_mode_external(v->domain)
-                                      && (guest_index(gl3e) % 4) == 3)
-                                     ? PGC_SH2_l2h_pae_shadow
-                                     : PGC_SH2_l2_shadow);
-            if ( mfn_x(gmfn) != mfn_x(mfn) )
-                AUDIT_FAIL(3, "bad translation: gfn %" SH2_PRI_gfn
-                           " --> %" SH2_PRI_mfn " != mfn %" SH2_PRI_mfn "\n",
-                           gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn));
-        }
-    });
-    sh2_unmap_domain_page(gp);
-    return 0;
-}
-#endif /* GUEST_PAGING_LEVELS >= 3 */
-
-#if GUEST_PAGING_LEVELS >= 4
-int sh2_audit_l4_table(struct vcpu *v, mfn_t sl4mfn, mfn_t x)
-{
-    guest_l4e_t *gl4e, *gp;
-    shadow_l4e_t *sl4e;
-    mfn_t mfn, gmfn, gl4mfn;
-    gfn_t gfn;
-    char *s;
-    int done = 0;
-    int xen_mappings = !shadow2_mode_external(v->domain);
-
-    /* Follow the backpointer */
-    gl4mfn = _mfn(mfn_to_page(sl4mfn)->u.inuse.type_info);
-    gl4e = gp = sh2_map_domain_page(gl4mfn);
-    SHADOW2_FOREACH_L4E(sl4mfn, sl4e, &gl4e, done, xen_mappings,
-    {
-        s = sh2_audit_flags(v, 4, guest_l4e_get_flags(*gl4e),
-                            shadow_l4e_get_flags(*sl4e));
-        if ( s ) AUDIT_FAIL(4, "%s", s);
-
-        if ( SHADOW2_AUDIT & SHADOW2_AUDIT_ENTRIES_MFNS )
-        {
-            gfn = guest_l4e_get_gfn(*gl4e);
-            mfn = shadow_l4e_get_mfn(*sl4e);
-            gmfn = get_shadow_status(v, audit_gfn_to_mfn(v, gfn, gl4mfn), 
-                                     PGC_SH2_l3_shadow);
-            if ( mfn_x(gmfn) != mfn_x(mfn) )
-                AUDIT_FAIL(4, "bad translation: gfn %" SH2_PRI_gfn
-                           " --> %" SH2_PRI_mfn " != mfn %" SH2_PRI_mfn "\n",
-                           gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn));
-        }
-    });
-    sh2_unmap_domain_page(gp);
-    return 0;
-}
-#endif /* GUEST_PAGING_LEVELS >= 4 */
-
-
-#undef AUDIT_FAIL
-
-#endif /* Audit code */
-
-/**************************************************************************/
-/* Entry points into this mode of the shadow code.
- * This will all be mangled by the preprocessor to uniquify everything. */
-struct shadow2_paging_mode sh2_paging_mode = {
-    .page_fault             = sh2_page_fault, 
-    .invlpg                 = sh2_invlpg,
-    .gva_to_gpa             = sh2_gva_to_gpa,
-    .gva_to_gfn             = sh2_gva_to_gfn,
-    .update_cr3             = sh2_update_cr3,
-    .map_and_validate_gl1e  = sh2_map_and_validate_gl1e,
-    .map_and_validate_gl2e  = sh2_map_and_validate_gl2e,
-    .map_and_validate_gl2he = sh2_map_and_validate_gl2he,
-    .map_and_validate_gl3e  = sh2_map_and_validate_gl3e,
-    .map_and_validate_gl4e  = sh2_map_and_validate_gl4e,
-    .detach_old_tables      = sh2_detach_old_tables,
-    .x86_emulate_write      = sh2_x86_emulate_write,
-    .x86_emulate_cmpxchg    = sh2_x86_emulate_cmpxchg,
-    .x86_emulate_cmpxchg8b  = sh2_x86_emulate_cmpxchg8b,
-    .make_monitor_table     = sh2_make_monitor_table,
-    .destroy_monitor_table  = sh2_destroy_monitor_table,
-#if SHADOW2_OPTIMIZATIONS & SH2OPT_WRITABLE_HEURISTIC
-    .guess_wrmap            = sh2_guess_wrmap,
-#endif
-    .guest_levels           = GUEST_PAGING_LEVELS,
-    .shadow_levels          = SHADOW_PAGING_LEVELS,
-};
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End: 
- */
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c

index f47a6822970cdeba2ed4520aae4ac23596ab9d70..7d188ceef3e063e40233b06b12e1b45fba30599d 100644 (file)
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -870,8 +870,8 @@ static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
  
      if ( unlikely(IN_HYPERVISOR_RANGE(addr)) )
      {
-        if ( shadow2_mode_external(d) && guest_mode(regs) )
-            return shadow2_fault(addr, regs);
+        if ( shadow_mode_external(d) && guest_mode(regs) )
+            return shadow_fault(addr, regs);
          if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) )
              return handle_gdt_ldt_mapping_fault(
                  addr - GDT_LDT_VIRT_START, regs);
@@ -890,8 +890,8 @@ static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
           ptwr_do_page_fault(d, addr, regs) )
          return EXCRET_fault_fixed;
  
-    if ( shadow2_mode_enabled(d) )
-        return shadow2_fault(addr, regs);
+    if ( shadow_mode_enabled(d) )
+        return shadow_fault(addr, regs);
  
      return 0;
  }
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h

index cd66f2a0c4d8ebfaf5366b54595d7913701d3987..f1b7c7cc7b8a7f9a502d876f332ffd7982d50234 100644 (file)
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -59,10 +59,10 @@ extern void hypercall_page_initialise(struct domain *d, void *);
  
  struct shadow_domain {
      u32               mode;  /* flags to control shadow operation */
-    spinlock_t        lock;  /* shadow2 domain lock */
+    spinlock_t        lock;  /* shadow domain lock */
      int               locker; /* processor which holds the lock */
      const char       *locker_function; /* Func that took it */
-    struct list_head  freelists[SHADOW2_MAX_ORDER + 1]; 
+    struct list_head  freelists[SHADOW_MAX_ORDER + 1]; 
      struct list_head  p2m_freelist;
      struct list_head  p2m_inuse;
      struct list_head  toplevel_shadows;
@@ -70,10 +70,10 @@ struct shadow_domain {
      unsigned int      free_pages;   /* number of pages on freelists */
      unsigned int      p2m_pages;    /* number of pages in p2m map */
  
-    /* Shadow2 hashtable */
-    struct shadow2_hash_entry *hash_table;
-    struct shadow2_hash_entry *hash_freelist;
-    struct shadow2_hash_entry *hash_allocations;
+    /* Shadow hashtable */
+    struct shadow_hash_entry *hash_table;
+    struct shadow_hash_entry *hash_freelist;
+    struct shadow_hash_entry *hash_allocations;
      int hash_walking;  /* Some function is walking the hash table */
  
      /* Shadow log-dirty bitmap */
@@ -107,7 +107,7 @@ struct arch_domain
      /* Shadow-translated guest: Pseudophys base address of reserved area. */
      unsigned long first_reserved_pfn;
  
-    struct shadow_domain shadow2;
+    struct shadow_domain shadow;
  
      /* Shadow translated domain: P2M mapping */
      pagetable_t phys_table;
@@ -135,7 +135,7 @@ struct pae_l3_cache { };
  
  struct shadow_vcpu {
      /* Pointers to mode-specific entry points. */
-    struct shadow2_paging_mode *mode;
+    struct shadow_paging_mode *mode;
      /* Last MFN that we emulated a write to. */
      unsigned long last_emulated_mfn;
      /* HVM guest: paging enabled (CR0.PG)?  */
@@ -201,7 +201,7 @@ struct arch_vcpu
      /* Current LDT details. */
      unsigned long shadow_ldt_mapcnt;
  
-    struct shadow_vcpu shadow2;
+    struct shadow_vcpu shadow;
  } __cacheline_aligned;
  
  /* shorthands to improve code legibility */
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h

index 106e4860130cfaf5d8d1ef7053fd3dcbe7f0b7d7..2acdd2f23dcd74fc038bb6f840b1e7755389d59c 100644 (file)
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -22,7 +22,7 @@ struct page_info
      /* Each frame can be threaded onto a doubly-linked list. */
      union {
          struct list_head list;
-        /* Shadow2 uses this field as an up-pointer in lower-level shadows */
+        /* Shadow uses this field as an up-pointer in lower-level shadows */
          paddr_t up;
      };
  
@@ -59,7 +59,7 @@ struct page_info
          /* Only used on guest pages with a shadow.
           * Guest pages with a shadow must have a non-zero type count, so this
           * does not conflict with the tlbflush timestamp. */
-        u32 shadow2_flags;
+        u32 shadow_flags;
  
          // XXX -- we expect to add another field here, to be used for min/max
          // purposes, which is only used for shadow pages.
@@ -76,7 +76,7 @@ struct page_info
  #define PGT_ldt_page        (6U<<29) /* using this page in an LDT? */
  #define PGT_writable_page   (7U<<29) /* has writable mappings of this page? */
  
-#ifndef SHADOW2
+#ifndef SHADOW
  #define PGT_l1_shadow       PGT_l1_page_table
  #define PGT_l2_shadow       PGT_l2_page_table
  #define PGT_l3_shadow       PGT_l3_page_table
@@ -117,7 +117,7 @@ struct page_info
   /* 16-bit count of uses of this frame as its current type. */
  #define PGT_count_mask      ((1U<<16)-1)
  
-#ifndef SHADOW2
+#ifndef SHADOW
  #ifdef __x86_64__
  #define PGT_high_mfn_shift  52
  #define PGT_high_mfn_mask   (0xfffUL << PGT_high_mfn_shift)
@@ -132,7 +132,7 @@ struct page_info
  #define PGT_score_shift     23
  #define PGT_score_mask      (((1U<<4)-1)<<PGT_score_shift)
  #endif
-#endif /* SHADOW2 */
+#endif /* SHADOW */
  
   /* Cleared when the owning guest 'frees' this page. */
  #define _PGC_allocated      31
@@ -146,38 +146,38 @@ struct page_info
   /* 29-bit count of references to this frame. */
  #define PGC_count_mask      ((1U<<29)-1)
  
-/* shadow2 uses the count_info on shadow pages somewhat differently */
-/* NB: please coordinate any changes here with the SH2F's in shadow2.h */
-#define PGC_SH2_none           (0U<<28) /* on the shadow2 free list */
-#define PGC_SH2_min_shadow     (1U<<28)
-#define PGC_SH2_l1_32_shadow   (1U<<28) /* shadowing a 32-bit L1 guest page */
-#define PGC_SH2_fl1_32_shadow  (2U<<28) /* L1 shadow for a 32b 4M superpage */
-#define PGC_SH2_l2_32_shadow   (3U<<28) /* shadowing a 32-bit L2 guest page */
-#define PGC_SH2_l1_pae_shadow  (4U<<28) /* shadowing a pae L1 page */
-#define PGC_SH2_fl1_pae_shadow (5U<<28) /* L1 shadow for pae 2M superpg */
-#define PGC_SH2_l2_pae_shadow  (6U<<28) /* shadowing a pae L2-low page */
-#define PGC_SH2_l2h_pae_shadow (7U<<28) /* shadowing a pae L2-high page */
-#define PGC_SH2_l3_pae_shadow  (8U<<28) /* shadowing a pae L3 page */
-#define PGC_SH2_l1_64_shadow   (9U<<28) /* shadowing a 64-bit L1 page */
-#define PGC_SH2_fl1_64_shadow (10U<<28) /* L1 shadow for 64-bit 2M superpg */
-#define PGC_SH2_l2_64_shadow  (11U<<28) /* shadowing a 64-bit L2 page */
-#define PGC_SH2_l3_64_shadow  (12U<<28) /* shadowing a 64-bit L3 page */
-#define PGC_SH2_l4_64_shadow  (13U<<28) /* shadowing a 64-bit L4 page */
-#define PGC_SH2_max_shadow    (13U<<28)
-#define PGC_SH2_p2m_table     (14U<<28) /* in use as the p2m table */
-#define PGC_SH2_monitor_table (15U<<28) /* in use as a monitor table */
-#define PGC_SH2_unused        (15U<<28)
-
-#define PGC_SH2_type_mask     (15U<<28)
-#define PGC_SH2_type_shift          28
-
-#define PGC_SH2_pinned         (1U<<27)
-
-#define _PGC_SH2_log_dirty          26
-#define PGC_SH2_log_dirty      (1U<<26)
+/* shadow uses the count_info on shadow pages somewhat differently */
+/* NB: please coordinate any changes here with the SHF's in shadow.h */
+#define PGC_SH_none           (0U<<28) /* on the shadow free list */
+#define PGC_SH_min_shadow     (1U<<28)
+#define PGC_SH_l1_32_shadow   (1U<<28) /* shadowing a 32-bit L1 guest page */
+#define PGC_SH_fl1_32_shadow  (2U<<28) /* L1 shadow for a 32b 4M superpage */
+#define PGC_SH_l2_32_shadow   (3U<<28) /* shadowing a 32-bit L2 guest page */
+#define PGC_SH_l1_pae_shadow  (4U<<28) /* shadowing a pae L1 page */
+#define PGC_SH_fl1_pae_shadow (5U<<28) /* L1 shadow for pae 2M superpg */
+#define PGC_SH_l2_pae_shadow  (6U<<28) /* shadowing a pae L2-low page */
+#define PGC_SH_l2h_pae_shadow (7U<<28) /* shadowing a pae L2-high page */
+#define PGC_SH_l3_pae_shadow  (8U<<28) /* shadowing a pae L3 page */
+#define PGC_SH_l1_64_shadow   (9U<<28) /* shadowing a 64-bit L1 page */
+#define PGC_SH_fl1_64_shadow (10U<<28) /* L1 shadow for 64-bit 2M superpg */
+#define PGC_SH_l2_64_shadow  (11U<<28) /* shadowing a 64-bit L2 page */
+#define PGC_SH_l3_64_shadow  (12U<<28) /* shadowing a 64-bit L3 page */
+#define PGC_SH_l4_64_shadow  (13U<<28) /* shadowing a 64-bit L4 page */
+#define PGC_SH_max_shadow    (13U<<28)
+#define PGC_SH_p2m_table     (14U<<28) /* in use as the p2m table */
+#define PGC_SH_monitor_table (15U<<28) /* in use as a monitor table */
+#define PGC_SH_unused        (15U<<28)
+
+#define PGC_SH_type_mask     (15U<<28)
+#define PGC_SH_type_shift          28
+
+#define PGC_SH_pinned         (1U<<27)
+
+#define _PGC_SH_log_dirty          26
+#define PGC_SH_log_dirty      (1U<<26)
  
  /* 26 bit ref count for shadow pages */
-#define PGC_SH2_count_mask    ((1U<<26) - 1)
+#define PGC_SH_count_mask    ((1U<<26) - 1)
  
  /* We trust the slab allocator in slab.c, and our use of it. */
  #define PageSlab(page)     (1)
@@ -201,9 +201,9 @@ static inline u32 pickle_domptr(struct domain *domain)
  
  /* The order of the largest allocation unit we use for shadow pages */
  #if CONFIG_PAGING_LEVELS == 2
-#define SHADOW2_MAX_ORDER 0 /* Only ever need 4k allocations */
+#define SHADOW_MAX_ORDER 0 /* Only ever need 4k allocations */
  #else  
-#define SHADOW2_MAX_ORDER 2 /* Need up to 16k allocs for 32-bit on PAE/64 */
+#define SHADOW_MAX_ORDER 2 /* Need up to 16k allocs for 32-bit on PAE/64 */
  #endif
  
  #define page_get_owner(_p)    (unpickle_domptr((_p)->u.inuse._domain))
@@ -227,7 +227,7 @@ extern void invalidate_shadow_ldt(struct vcpu *d);
  extern int shadow_remove_all_write_access(
      struct domain *d, unsigned long gmfn, unsigned long mfn);
  extern u32 shadow_remove_all_access( struct domain *d, unsigned long gmfn);
-extern int _shadow2_mode_refcounts(struct domain *d);
+extern int _shadow_mode_refcounts(struct domain *d);
  
  static inline void put_page(struct page_info *page)
  {
@@ -259,7 +259,7 @@ static inline int get_page(struct page_info *page,
               unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
               unlikely(d != _domain) )                /* Wrong owner? */
          {
-            if ( !_shadow2_mode_refcounts(domain) )
+            if ( !_shadow_mode_refcounts(domain) )
                  DPRINTK("Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%" 
                          PRtype_info "\n",
                          page_to_mfn(page), domain, unpickle_domptr(d),
@@ -345,11 +345,11 @@ int check_descriptor(struct desc_struct *d);
  
  
  #define mfn_to_gmfn(_d, mfn)                            \
-    ( (shadow2_mode_translate(_d))                      \
+    ( (shadow_mode_translate(_d))                      \
        ? get_gpfn_from_mfn(mfn)                          \
        : (mfn) )
  
-#define gmfn_to_mfn(_d, gpfn)  mfn_x(sh2_gfn_to_mfn(_d, gpfn))
+#define gmfn_to_mfn(_d, gpfn)  mfn_x(sh_gfn_to_mfn(_d, gpfn))
  
  
  /*
diff --git a/xen/include/asm-x86/page-guest32.h b/xen/include/asm-x86/page-guest32.h

deleted file mode 100644 (file)

index e932061..0000000
--- a/xen/include/asm-x86/page-guest32.h
+++ /dev/null
@@ -1,105 +0,0 @@
-
-#ifndef __X86_PAGE_GUEST_H__
-#define __X86_PAGE_GUEST_H__
-
-#ifndef __ASSEMBLY__
-# include <asm/types.h>
-#endif
-
-#define PAGETABLE_ORDER_32         10
-#define L1_PAGETABLE_ENTRIES_32    (1<<PAGETABLE_ORDER_32)
-#define L2_PAGETABLE_ENTRIES_32    (1<<PAGETABLE_ORDER_32)
-#define ROOT_PAGETABLE_ENTRIES_32  L2_PAGETABLE_ENTRIES_32
-
-
-#define L1_PAGETABLE_SHIFT_32 12
-#define L2_PAGETABLE_SHIFT_32 22
-
-/* Extract flags into 12-bit integer, or turn 12-bit flags into a pte mask. */
-
-#ifndef __ASSEMBLY__
-
-typedef u32 intpte_32_t;
-
-typedef struct { intpte_32_t l1; } l1_pgentry_32_t;
-typedef struct { intpte_32_t l2; } l2_pgentry_32_t;
-typedef l2_pgentry_t root_pgentry_32_t;
-#endif
-
-#define get_pte_flags_32(x) ((u32)(x) & 0xFFF)
-#define put_pte_flags_32(x) ((intpte_32_t)(x))
-
-/* Get pte access flags (unsigned int). */
-#define l1e_get_flags_32(x)           (get_pte_flags_32((x).l1))
-#define l2e_get_flags_32(x)           (get_pte_flags_32((x).l2))
-
-#define l1e_get_paddr_32(x)           \
-    ((paddr_t)(((x).l1 & (PADDR_MASK&PAGE_MASK))))
-#define l2e_get_paddr_32(x)           \
-    ((paddr_t)(((x).l2 & (PADDR_MASK&PAGE_MASK))))
-
-/* Construct an empty pte. */
-#define l1e_empty_32()                ((l1_pgentry_32_t) { 0 })
-#define l2e_empty_32()                ((l2_pgentry_32_t) { 0 })
-
-/* Construct a pte from a pfn and access flags. */
-#define l1e_from_pfn_32(pfn, flags)   \
-    ((l1_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) | put_pte_flags_32(flags) })
-#define l2e_from_pfn_32(pfn, flags)   \
-    ((l2_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) | put_pte_flags_32(flags) })
-
-/* Construct a pte from a physical address and access flags. */
-#ifndef __ASSEMBLY__
-static inline l1_pgentry_32_t l1e_from_paddr_32(paddr_t pa, unsigned int flags)
-{
-    ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0);
-    return (l1_pgentry_32_t) { pa | put_pte_flags_32(flags) };
-}
-static inline l2_pgentry_32_t l2e_from_paddr_32(paddr_t pa, unsigned int flags)
-{
-    ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0);
-    return (l2_pgentry_32_t) { pa | put_pte_flags_32(flags) };
-}
-#endif /* !__ASSEMBLY__ */
-
-
-/* Construct a pte from a page pointer and access flags. */
-#define l1e_from_page_32(page, flags) (l1e_from_pfn_32(page_to_mfn(page),(flags)))
-#define l2e_from_page_32(page, flags) (l2e_from_pfn_32(page_to_mfn(page),(flags)))
-
-/* Add extra flags to an existing pte. */
-#define l1e_add_flags_32(x, flags)    ((x).l1 |= put_pte_flags_32(flags))
-#define l2e_add_flags_32(x, flags)    ((x).l2 |= put_pte_flags_32(flags))
-
-/* Remove flags from an existing pte. */
-#define l1e_remove_flags_32(x, flags) ((x).l1 &= ~put_pte_flags_32(flags))
-#define l2e_remove_flags_32(x, flags) ((x).l2 &= ~put_pte_flags_32(flags))
-
-/* Check if a pte's page mapping or significant access flags have changed. */
-#define l1e_has_changed_32(x,y,flags) \
-    ( !!(((x).l1 ^ (y).l1) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags))) )
-#define l2e_has_changed_32(x,y,flags) \
-    ( !!(((x).l2 ^ (y).l2) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags))) )
-
-/* Given a virtual address, get an entry offset into a page table. */
-#define l1_table_offset_32(a)         \
-    (((a) >> L1_PAGETABLE_SHIFT_32) & (L1_PAGETABLE_ENTRIES_32 - 1))
-#define l2_table_offset_32(a)         \
-    (((a) >> L2_PAGETABLE_SHIFT_32) & (L2_PAGETABLE_ENTRIES_32 - 1))
-
-#define linear_l1_table_32                                                 \
-    ((l1_pgentry_32_t *)(LINEAR_PT_VIRT_START))
-
-#define linear_pg_table_32 linear_l1_table_32
-
-#endif /* __X86_PAGE_GUEST_H__ */
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/xen/include/asm-x86/perfc_defn.h b/xen/include/asm-x86/perfc_defn.h

index ae6e3d2b9bec14ebd9e707924ef23f7dfcf82231..73b9ffe9ba8201625300127aec2e083d5e804070 100644 (file)
--- a/xen/include/asm-x86/perfc_defn.h
+++ b/xen/include/asm-x86/perfc_defn.h
@@ -30,59 +30,59 @@ PERFCOUNTER_CPU(ptwr_emulations,        "writable pt emulations")
  PERFCOUNTER_CPU(exception_fixed,        "pre-exception fixed")
  
  
-/* Shadow2 counters */
-PERFCOUNTER_CPU(shadow2_alloc,          "calls to shadow2_alloc")
-PERFCOUNTER_CPU(shadow2_alloc_tlbflush, "shadow2_alloc flushed TLBs")
+/* Shadow counters */
+PERFCOUNTER_CPU(shadow_alloc,          "calls to shadow_alloc")
+PERFCOUNTER_CPU(shadow_alloc_tlbflush, "shadow_alloc flushed TLBs")
  
  /* STATUS counters do not reset when 'P' is hit */
-PERFSTATUS(shadow2_alloc_count,         "number of shadow pages in use")
-PERFCOUNTER_CPU(shadow2_free,           "calls to shadow2_free")
-PERFCOUNTER_CPU(shadow2_prealloc_1,     "shadow2 recycles old shadows")
-PERFCOUNTER_CPU(shadow2_prealloc_2,     "shadow2 recycles in-use shadows")
-PERFCOUNTER_CPU(shadow2_linear_map_failed, "shadow2 hit read-only linear map")
-PERFCOUNTER_CPU(shadow2_a_update,       "shadow2 A bit update")
-PERFCOUNTER_CPU(shadow2_ad_update,      "shadow2 A&D bit update")
-PERFCOUNTER_CPU(shadow2_fault,          "calls to shadow2_fault")
-PERFCOUNTER_CPU(shadow2_fault_bail_bad_gfn, "shadow2_fault guest bad gfn")
-PERFCOUNTER_CPU(shadow2_fault_bail_not_present, 
-                                        "shadow2_fault guest not-present")
-PERFCOUNTER_CPU(shadow2_fault_bail_nx,  "shadow2_fault guest NX fault")
-PERFCOUNTER_CPU(shadow2_fault_bail_ro_mapping, "shadow2_fault guest R/W fault")
-PERFCOUNTER_CPU(shadow2_fault_bail_user_supervisor, 
-                                        "shadow2_fault guest U/S fault")
-PERFCOUNTER_CPU(shadow2_fault_emulate_read, "shadow2_fault emulates a read")
-PERFCOUNTER_CPU(shadow2_fault_emulate_write, "shadow2_fault emulates a write")
-PERFCOUNTER_CPU(shadow2_fault_emulate_failed, "shadow2_fault emulator fails")
-PERFCOUNTER_CPU(shadow2_fault_mmio,     "shadow2_fault handled as mmio")
-PERFCOUNTER_CPU(shadow2_fault_fixed,    "shadow2_fault fixed fault")
-PERFCOUNTER_CPU(shadow2_ptwr_emulate,   "shadow2 causes ptwr to emulate")
-PERFCOUNTER_CPU(shadow2_validate_gl1e_calls, "calls to shadow2_validate_gl1e")
-PERFCOUNTER_CPU(shadow2_validate_gl2e_calls, "calls to shadow2_validate_gl2e")
-PERFCOUNTER_CPU(shadow2_validate_gl3e_calls, "calls to shadow2_validate_gl3e")
-PERFCOUNTER_CPU(shadow2_validate_gl4e_calls, "calls to shadow2_validate_gl4e")
-PERFCOUNTER_CPU(shadow2_hash_lookups,   "calls to shadow2_hash_lookup")
-PERFCOUNTER_CPU(shadow2_hash_lookup_head, "shadow2 hash hit in bucket head")
-PERFCOUNTER_CPU(shadow2_hash_lookup_miss, "shadow2 hash misses")
-PERFCOUNTER_CPU(shadow2_get_shadow_status, "calls to get_shadow_status")
-PERFCOUNTER_CPU(shadow2_hash_inserts,   "calls to shadow2_hash_insert")
-PERFCOUNTER_CPU(shadow2_hash_deletes,   "calls to shadow2_hash_delete")
-PERFCOUNTER_CPU(shadow2_writeable,      "shadow2 removes write access")
-PERFCOUNTER_CPU(shadow2_writeable_h_1,  "shadow2 writeable: 32b w2k3")
-PERFCOUNTER_CPU(shadow2_writeable_h_2,  "shadow2 writeable: 32pae w2k3")
-PERFCOUNTER_CPU(shadow2_writeable_h_3,  "shadow2 writeable: 64b w2k3")
-PERFCOUNTER_CPU(shadow2_writeable_h_4,  "shadow2 writeable: 32b linux low")
-PERFCOUNTER_CPU(shadow2_writeable_bf,   "shadow2 writeable brute-force")
-PERFCOUNTER_CPU(shadow2_mappings,       "shadow2 removes all mappings")
-PERFCOUNTER_CPU(shadow2_mappings_bf,    "shadow2 rm-mappings brute-force")
-PERFCOUNTER_CPU(shadow2_early_unshadow, "shadow2 unshadows for fork/exit")
-PERFCOUNTER_CPU(shadow2_early_unshadow_top, "shadow2 unhooks for fork/exit")
-PERFCOUNTER_CPU(shadow2_unshadow,       "shadow2 unshadows a page")
-PERFCOUNTER_CPU(shadow2_up_pointer,     "shadow2 unshadow by up-pointer")
-PERFCOUNTER_CPU(shadow2_unshadow_bf,    "shadow2 unshadow brute-force")
-PERFCOUNTER_CPU(shadow2_get_page_fail,  "shadow2_get_page_from_l1e failed")
-PERFCOUNTER_CPU(shadow2_guest_walk,     "shadow2 walks guest tables")
-PERFCOUNTER_CPU(shadow2_walk_cache_hit, "shadow2 walk-cache hits")
-PERFCOUNTER_CPU(shadow2_walk_cache_miss, "shadow2 walk-cache misses")
+PERFSTATUS(shadow_alloc_count,         "number of shadow pages in use")
+PERFCOUNTER_CPU(shadow_free,           "calls to shadow_free")
+PERFCOUNTER_CPU(shadow_prealloc_1,     "shadow recycles old shadows")
+PERFCOUNTER_CPU(shadow_prealloc_2,     "shadow recycles in-use shadows")
+PERFCOUNTER_CPU(shadow_linear_map_failed, "shadow hit read-only linear map")
+PERFCOUNTER_CPU(shadow_a_update,       "shadow A bit update")
+PERFCOUNTER_CPU(shadow_ad_update,      "shadow A&D bit update")
+PERFCOUNTER_CPU(shadow_fault,          "calls to shadow_fault")
+PERFCOUNTER_CPU(shadow_fault_bail_bad_gfn, "shadow_fault guest bad gfn")
+PERFCOUNTER_CPU(shadow_fault_bail_not_present, 
+                                        "shadow_fault guest not-present")
+PERFCOUNTER_CPU(shadow_fault_bail_nx,  "shadow_fault guest NX fault")
+PERFCOUNTER_CPU(shadow_fault_bail_ro_mapping, "shadow_fault guest R/W fault")
+PERFCOUNTER_CPU(shadow_fault_bail_user_supervisor, 
+                                        "shadow_fault guest U/S fault")
+PERFCOUNTER_CPU(shadow_fault_emulate_read, "shadow_fault emulates a read")
+PERFCOUNTER_CPU(shadow_fault_emulate_write, "shadow_fault emulates a write")
+PERFCOUNTER_CPU(shadow_fault_emulate_failed, "shadow_fault emulator fails")
+PERFCOUNTER_CPU(shadow_fault_mmio,     "shadow_fault handled as mmio")
+PERFCOUNTER_CPU(shadow_fault_fixed,    "shadow_fault fixed fault")
+PERFCOUNTER_CPU(shadow_ptwr_emulate,   "shadow causes ptwr to emulate")
+PERFCOUNTER_CPU(shadow_validate_gl1e_calls, "calls to shadow_validate_gl1e")
+PERFCOUNTER_CPU(shadow_validate_gl2e_calls, "calls to shadow_validate_gl2e")
+PERFCOUNTER_CPU(shadow_validate_gl3e_calls, "calls to shadow_validate_gl3e")
+PERFCOUNTER_CPU(shadow_validate_gl4e_calls, "calls to shadow_validate_gl4e")
+PERFCOUNTER_CPU(shadow_hash_lookups,   "calls to shadow_hash_lookup")
+PERFCOUNTER_CPU(shadow_hash_lookup_head, "shadow hash hit in bucket head")
+PERFCOUNTER_CPU(shadow_hash_lookup_miss, "shadow hash misses")
+PERFCOUNTER_CPU(shadow_get_shadow_status, "calls to get_shadow_status")
+PERFCOUNTER_CPU(shadow_hash_inserts,   "calls to shadow_hash_insert")
+PERFCOUNTER_CPU(shadow_hash_deletes,   "calls to shadow_hash_delete")
+PERFCOUNTER_CPU(shadow_writeable,      "shadow removes write access")
+PERFCOUNTER_CPU(shadow_writeable_h_1,  "shadow writeable: 32b w2k3")
+PERFCOUNTER_CPU(shadow_writeable_h_2,  "shadow writeable: 32pae w2k3")
+PERFCOUNTER_CPU(shadow_writeable_h_3,  "shadow writeable: 64b w2k3")
+PERFCOUNTER_CPU(shadow_writeable_h_4,  "shadow writeable: 32b linux low")
+PERFCOUNTER_CPU(shadow_writeable_bf,   "shadow writeable brute-force")
+PERFCOUNTER_CPU(shadow_mappings,       "shadow removes all mappings")
+PERFCOUNTER_CPU(shadow_mappings_bf,    "shadow rm-mappings brute-force")
+PERFCOUNTER_CPU(shadow_early_unshadow, "shadow unshadows for fork/exit")
+PERFCOUNTER_CPU(shadow_early_unshadow_top, "shadow unhooks for fork/exit")
+PERFCOUNTER_CPU(shadow_unshadow,       "shadow unshadows a page")
+PERFCOUNTER_CPU(shadow_up_pointer,     "shadow unshadow by up-pointer")
+PERFCOUNTER_CPU(shadow_unshadow_bf,    "shadow unshadow brute-force")
+PERFCOUNTER_CPU(shadow_get_page_fail,  "shadow_get_page_from_l1e failed")
+PERFCOUNTER_CPU(shadow_guest_walk,     "shadow walks guest tables")
+PERFCOUNTER_CPU(shadow_walk_cache_hit, "shadow walk-cache hits")
+PERFCOUNTER_CPU(shadow_walk_cache_miss, "shadow walk-cache misses")
  
  
  /*#endif*/ /* __XEN_PERFC_DEFN_H__ */
diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h

index efade3021ca5527f76f43c2f0b65b2389736bc56..f15559ba0a4b33485ce3165f9166c5e445d4e162 100644 (file)
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -1,7 +1,9 @@
  /******************************************************************************
   * include/asm-x86/shadow.h
   * 
- * Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
   * 
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License as published by
@@ -21,26 +23,608 @@
  #ifndef _XEN_SHADOW_H
  #define _XEN_SHADOW_H
  
-/* This file is just a wrapper around the new Shadow2 header,
- * providing names that must be defined in any shadow implementation. */
-
-#include <asm/shadow2.h>
+#include <public/domctl.h> 
+#include <xen/sched.h>
+#include <xen/perfc.h>
+#include <asm/flushtlb.h>
  
  /* How to make sure a page is not referred to in a shadow PT */
  /* This will need to be a for_each_vcpu if we go to per-vcpu shadows */ 
  #define shadow_drop_references(_d, _p)                      \
-    shadow2_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p)))
+    shadow_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p)))
  #define shadow_sync_and_drop_references(_d, _p)             \
-    shadow2_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p)))
-
-/* Whether we are translating the domain's frame numbers for it */
-#define shadow_mode_translate(d)  shadow2_mode_translate(d)
+    shadow_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p)))
  
-/* ...and  if so, how to add and remove entries in the mapping */
+/* How to add and remove entries in the p2m mapping. */
  #define guest_physmap_add_page(_d, _p, _m)                  \
-    shadow2_guest_physmap_add_page((_d), (_p), (_m))
+    shadow_guest_physmap_add_page((_d), (_p), (_m))
  #define guest_physmap_remove_page(_d, _p, _m   )            \
-    shadow2_guest_physmap_remove_page((_d), (_p), (_m))
+    shadow_guest_physmap_remove_page((_d), (_p), (_m))
+
+/* Shadow PT operation mode : shadow-mode variable in arch_domain. */
+
+#define SHM2_shift 10
+/* We're in one of the shadow modes */
+#define SHM2_enable    (1U << SHM2_shift)
+/* Refcounts based on shadow tables instead of guest tables */
+#define SHM2_refcounts (XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT << SHM2_shift)
+/* Enable log dirty mode */
+#define SHM2_log_dirty (XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY << SHM2_shift)
+/* Xen does p2m translation, not guest */
+#define SHM2_translate (XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE << SHM2_shift)
+/* Xen does not steal address space from the domain for its own booking;
+ * requires VT or similar mechanisms */
+#define SHM2_external  (XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL << SHM2_shift)
+
+#define shadow_mode_enabled(_d)   ((_d)->arch.shadow.mode)
+#define shadow_mode_refcounts(_d) ((_d)->arch.shadow.mode & SHM2_refcounts)
+#define shadow_mode_log_dirty(_d) ((_d)->arch.shadow.mode & SHM2_log_dirty)
+#define shadow_mode_translate(_d) ((_d)->arch.shadow.mode & SHM2_translate)
+#define shadow_mode_external(_d)  ((_d)->arch.shadow.mode & SHM2_external)
+
+/* Xen traps & emulates all reads of all page table pages:
+ *not yet supported
+ */
+#define shadow_mode_trap_reads(_d) ({ (void)(_d); 0; })
+
+// flags used in the return value of the shadow_set_lXe() functions...
+#define SHADOW_SET_CHANGED            0x1
+#define SHADOW_SET_FLUSH              0x2
+#define SHADOW_SET_ERROR              0x4
+#define SHADOW_SET_L3PAE_RECOPY       0x8
+
+// How do we tell that we have a 32-bit PV guest in a 64-bit Xen?
+#ifdef __x86_64__
+#define pv_32bit_guest(_v) 0 // not yet supported
+#else
+#define pv_32bit_guest(_v) !hvm_guest(v)
+#endif
+
+/* The shadow lock.
+ *
+ * This lock is per-domain.  It is intended to allow us to make atomic
+ * updates to the software TLB that the shadow tables provide.
+ * 
+ * Specifically, it protects:
+ *   - all changes to shadow page table pages
+ *   - the shadow hash table
+ *   - the shadow page allocator 
+ *   - all changes to guest page table pages; if/when the notion of
+ *     out-of-sync pages is added to this code, then the shadow lock is
+ *     protecting all guest page table pages which are not listed as
+ *     currently as both guest-writable and out-of-sync...
+ *     XXX -- need to think about this relative to writable page tables.
+ *   - all changes to the page_info->tlbflush_timestamp
+ *   - the page_info->count fields on shadow pages
+ *   - the shadow dirty bit array and count
+ *   - XXX
+ */
+#ifndef CONFIG_SMP
+#error shadow.h currently requires CONFIG_SMP
+#endif
+
+#define shadow_lock_init(_d)                                   \
+    do {                                                        \
+        spin_lock_init(&(_d)->arch.shadow.lock);               \
+        (_d)->arch.shadow.locker = -1;                         \
+        (_d)->arch.shadow.locker_function = "nobody";          \
+    } while (0)
+
+#define shadow_lock_is_acquired(_d)                            \
+    (current->processor == (_d)->arch.shadow.locker)
+
+#define shadow_lock(_d)                                                 \
+    do {                                                                 \
+        if ( unlikely((_d)->arch.shadow.locker == current->processor) ) \
+        {                                                                \
+            printk("Error: shadow lock held by %s\n",                   \
+                   (_d)->arch.shadow.locker_function);                  \
+            BUG();                                                       \
+        }                                                                \
+        spin_lock(&(_d)->arch.shadow.lock);                             \
+        ASSERT((_d)->arch.shadow.locker == -1);                         \
+        (_d)->arch.shadow.locker = current->processor;                  \
+        (_d)->arch.shadow.locker_function = __func__;                   \
+    } while (0)
+
+#define shadow_unlock(_d)                                              \
+    do {                                                                \
+        ASSERT((_d)->arch.shadow.locker == current->processor);        \
+        (_d)->arch.shadow.locker = -1;                                 \
+        (_d)->arch.shadow.locker_function = "nobody";                  \
+        spin_unlock(&(_d)->arch.shadow.lock);                          \
+    } while (0)
+
+/* 
+ * Levels of self-test and paranoia
+ * XXX should go in config files somewhere?  
+ */
+#define SHADOW_AUDIT_HASH           0x01  /* Check current hash bucket */
+#define SHADOW_AUDIT_HASH_FULL      0x02  /* Check every hash bucket */
+#define SHADOW_AUDIT_ENTRIES        0x04  /* Check this walk's shadows */
+#define SHADOW_AUDIT_ENTRIES_FULL   0x08  /* Check every shadow */
+#define SHADOW_AUDIT_ENTRIES_MFNS   0x10  /* Check gfn-mfn map in shadows */
+#define SHADOW_AUDIT_P2M            0x20  /* Check the p2m table */
+
+#ifdef NDEBUG
+#define SHADOW_AUDIT                   0
+#define SHADOW_AUDIT_ENABLE            0
+#else
+#define SHADOW_AUDIT                0x15  /* Basic audit of all except p2m. */
+#define SHADOW_AUDIT_ENABLE         shadow_audit_enable
+extern int shadow_audit_enable;
+#endif
+
+/* 
+ * Levels of optimization
+ * XXX should go in config files somewhere?  
+ */
+#define SHOPT_WRITABLE_HEURISTIC  0x01  /* Guess at RW PTEs via linear maps */
+#define SHOPT_EARLY_UNSHADOW      0x02  /* Unshadow l1s on fork or exit */
+
+#define SHADOW_OPTIMIZATIONS      0x03
+
+
+/* With shadow pagetables, the different kinds of address start 
+ * to get get confusing.
+ * 
+ * Virtual addresses are what they usually are: the addresses that are used 
+ * to accessing memory while the guest is running.  The MMU translates from 
+ * virtual addresses to machine addresses. 
+ * 
+ * (Pseudo-)physical addresses are the abstraction of physical memory the
+ * guest uses for allocation and so forth.  For the purposes of this code, 
+ * we can largely ignore them.
+ *
+ * Guest frame numbers (gfns) are the entries that the guest puts in its
+ * pagetables.  For normal paravirtual guests, they are actual frame numbers,
+ * with the translation done by the guest.  
+ * 
+ * Machine frame numbers (mfns) are the entries that the hypervisor puts
+ * in the shadow page tables.
+ *
+ * Elsewhere in the xen code base, the name "gmfn" is generally used to refer
+ * to a "machine frame number, from the guest's perspective", or in other
+ * words, pseudo-physical frame numbers.  However, in the shadow code, the
+ * term "gmfn" means "the mfn of a guest page"; this combines naturally with
+ * other terms such as "smfn" (the mfn of a shadow page), gl2mfn (the mfn of a
+ * guest L2 page), etc...
+ */
+
+/* With this defined, we do some ugly things to force the compiler to
+ * give us type safety between mfns and gfns and other integers.
+ * TYPE_SAFE(int foo) defines a foo_t, and _foo() and foo_x() functions 
+ * that translate beween int and foo_t.
+ * 
+ * It does have some performance cost because the types now have 
+ * a different storage attribute, so may not want it on all the time. */
+#ifndef NDEBUG
+#define TYPE_SAFETY 1
+#endif
+
+#ifdef TYPE_SAFETY
+#define TYPE_SAFE(_type,_name)                                  \
+typedef struct { _type _name; } _name##_t;                      \
+static inline _name##_t _##_name(_type n) { return (_name##_t) { n }; } \
+static inline _type _name##_x(_name##_t n) { return n._name; }
+#else
+#define TYPE_SAFE(_type,_name)                                          \
+typedef _type _name##_t;                                                \
+static inline _name##_t _##_name(_type n) { return n; }                 \
+static inline _type _name##_x(_name##_t n) { return n; }
+#endif
+
+TYPE_SAFE(unsigned long,mfn)
+#define SH_PRI_mfn "05lx"
+
+static inline int
+valid_mfn(mfn_t m)
+{
+    return VALID_MFN(mfn_x(m));
+}
+
+static inline mfn_t
+pagetable_get_mfn(pagetable_t pt)
+{
+    return _mfn(pagetable_get_pfn(pt));
+}
+
+static inline pagetable_t
+pagetable_from_mfn(mfn_t mfn)
+{
+    return pagetable_from_pfn(mfn_x(mfn));
+}
+
+static inline int
+shadow_vcpu_mode_translate(struct vcpu *v)
+{
+    // Returns true if this VCPU needs to be using the P2M table to translate
+    // between GFNs and MFNs.
+    //
+    // This is true of translated HVM domains on a vcpu which has paging
+    // enabled.  (HVM vcpu's with paging disabled are using the p2m table as
+    // its paging table, so no translation occurs in this case.)
+    //
+    return v->arch.shadow.hvm_paging_enabled;
+}
+
+
+/**************************************************************************/
+/* Mode-specific entry points into the shadow code */
+
+struct x86_emulate_ctxt;
+struct shadow_paging_mode {
+    int           (*page_fault            )(struct vcpu *v, unsigned long va,
+                                            struct cpu_user_regs *regs);
+    int           (*invlpg                )(struct vcpu *v, unsigned long va);
+    unsigned long (*gva_to_gpa            )(struct vcpu *v, unsigned long va);
+    unsigned long (*gva_to_gfn            )(struct vcpu *v, unsigned long va);
+    void          (*update_cr3            )(struct vcpu *v);
+    int           (*map_and_validate_gl1e )(struct vcpu *v, mfn_t gmfn,
+                                            void *new_guest_entry, u32 size);
+    int           (*map_and_validate_gl2e )(struct vcpu *v, mfn_t gmfn,
+                                            void *new_guest_entry, u32 size);
+    int           (*map_and_validate_gl2he)(struct vcpu *v, mfn_t gmfn,
+                                            void *new_guest_entry, u32 size);
+    int           (*map_and_validate_gl3e )(struct vcpu *v, mfn_t gmfn,
+                                            void *new_guest_entry, u32 size);
+    int           (*map_and_validate_gl4e )(struct vcpu *v, mfn_t gmfn,
+                                            void *new_guest_entry, u32 size);
+    void          (*detach_old_tables     )(struct vcpu *v);
+    int           (*x86_emulate_write     )(struct vcpu *v, unsigned long va,
+                                            void *src, u32 bytes,
+                                            struct x86_emulate_ctxt *ctxt);
+    int           (*x86_emulate_cmpxchg   )(struct vcpu *v, unsigned long va,
+                                            unsigned long old, 
+                                            unsigned long new,
+                                            unsigned int bytes,
+                                            struct x86_emulate_ctxt *ctxt);
+    int           (*x86_emulate_cmpxchg8b )(struct vcpu *v, unsigned long va,
+                                            unsigned long old_lo, 
+                                            unsigned long old_hi, 
+                                            unsigned long new_lo,
+                                            unsigned long new_hi,
+                                            struct x86_emulate_ctxt *ctxt);
+    mfn_t         (*make_monitor_table    )(struct vcpu *v);
+    void          (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn);
+#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
+    int           (*guess_wrmap           )(struct vcpu *v, 
+                                            unsigned long vaddr, mfn_t gmfn);
+#endif
+    /* For outsiders to tell what mode we're in */
+    unsigned int shadow_levels;
+    unsigned int guest_levels;
+};
+
+static inline int shadow_guest_paging_levels(struct vcpu *v)
+{
+    ASSERT(v->arch.shadow.mode != NULL);
+    return v->arch.shadow.mode->guest_levels;
+}
+
+/**************************************************************************/
+/* Entry points into the shadow code */
+
+/* Turning on shadow test mode */
+int shadow_test_enable(struct domain *d);
+
+/* Handler for shadow control ops: enabling and disabling shadow modes, 
+ * and log-dirty bitmap ops all happen through here. */
+int shadow_domctl(struct domain *d, 
+                   xen_domctl_shadow_op_t *sc,
+                   XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
+
+/* Call when destroying a domain */
+void shadow_teardown(struct domain *d);
+
+/* Call once all of the references to the domain have gone away */
+void shadow_final_teardown(struct domain *d);
+
+
+/* Mark a page as dirty in the bitmap */
+void sh_do_mark_dirty(struct domain *d, mfn_t gmfn);
+static inline void mark_dirty(struct domain *d, unsigned long gmfn)
+{
+    if ( shadow_mode_log_dirty(d) )
+    {
+        shadow_lock(d);
+        sh_do_mark_dirty(d, _mfn(gmfn));
+        shadow_unlock(d);
+    }
+}
+
+/* Internal version, for when the shadow lock is already held */
+static inline void sh_mark_dirty(struct domain *d, mfn_t gmfn)
+{
+    ASSERT(shadow_lock_is_acquired(d));
+    if ( shadow_mode_log_dirty(d) )
+        sh_do_mark_dirty(d, gmfn);
+}
+
+static inline int
+shadow_fault(unsigned long va, struct cpu_user_regs *regs)
+/* Called from pagefault handler in Xen, and from the HVM trap handlers
+ * for pagefaults.  Returns 1 if this fault was an artefact of the
+ * shadow code (and the guest should retry) or 0 if it is not (and the
+ * fault should be handled elsewhere or passed to the guest). */
+{
+    struct vcpu *v = current;
+    perfc_incrc(shadow_fault);
+    return v->arch.shadow.mode->page_fault(v, va, regs);
+}
+
+static inline int
+shadow_invlpg(struct vcpu *v, unsigned long va)
+/* Called when the guest requests an invlpg.  Returns 1 if the invlpg
+ * instruction should be issued on the hardware, or 0 if it's safe not
+ * to do so. */
+{
+    return v->arch.shadow.mode->invlpg(v, va);
+}
+
+static inline unsigned long
+shadow_gva_to_gpa(struct vcpu *v, unsigned long va)
+/* Called to translate a guest virtual address to what the *guest*
+ * pagetables would map it to. */
+{
+    return v->arch.shadow.mode->gva_to_gpa(v, va);
+}
+
+static inline unsigned long
+shadow_gva_to_gfn(struct vcpu *v, unsigned long va)
+/* Called to translate a guest virtual address to what the *guest*
+ * pagetables would map it to. */
+{
+    return v->arch.shadow.mode->gva_to_gfn(v, va);
+}
+
+static inline void
+shadow_update_cr3(struct vcpu *v)
+/* Updates all the things that are derived from the guest's CR3. 
+ * Called when the guest changes CR3. */
+{
+    shadow_lock(v->domain);
+    v->arch.shadow.mode->update_cr3(v);
+    shadow_unlock(v->domain);
+}
+
+
+/* Should be called after CR3 is updated.
+ * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3.
+ * 
+ * Also updates other state derived from CR3 (vcpu->arch.guest_vtable,
+ * shadow_vtable, etc).
+ *
+ * Uses values found in vcpu->arch.(guest_table and guest_table_user), and
+ * for HVM guests, arch.monitor_table and hvm's guest CR3.
+ *
+ * Update ref counts to shadow tables appropriately.
+ * For PAE, relocate L3 entries, if necessary, into low memory.
+ */
+static inline void update_cr3(struct vcpu *v)
+{
+    unsigned long cr3_mfn=0;
+
+    if ( shadow_mode_enabled(v->domain) )
+    {
+        shadow_update_cr3(v);
+        return;
+    }
+
+#if CONFIG_PAGING_LEVELS == 4
+    if ( !(v->arch.flags & TF_kernel_mode) )
+        cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user);
+    else
+#endif
+        cr3_mfn = pagetable_get_pfn(v->arch.guest_table);
+
+    make_cr3(v, cr3_mfn);
+}
+
+extern void sh_update_paging_modes(struct vcpu *v);
+
+/* Should be called to initialise paging structures if the paging mode
+ * has changed, and when bringing up a VCPU for the first time. */
+static inline void shadow_update_paging_modes(struct vcpu *v)
+{
+    ASSERT(shadow_mode_enabled(v->domain));
+    shadow_lock(v->domain);
+    sh_update_paging_modes(v);
+    shadow_unlock(v->domain);
+}
+
+static inline void
+shadow_detach_old_tables(struct vcpu *v)
+{
+    if ( v->arch.shadow.mode )
+        v->arch.shadow.mode->detach_old_tables(v);
+}
+
+static inline mfn_t
+shadow_make_monitor_table(struct vcpu *v)
+{
+    return v->arch.shadow.mode->make_monitor_table(v);
+}
+
+static inline void
+shadow_destroy_monitor_table(struct vcpu *v, mfn_t mmfn)
+{
+    v->arch.shadow.mode->destroy_monitor_table(v, mmfn);
+}
+
+/* Validate a pagetable change from the guest and update the shadows. */
+extern int shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn,
+                                        void *new_guest_entry);
+
+/* Update the shadows in response to a pagetable write from a HVM guest */
+extern void shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, 
+                                            void *entry, u32 size);
+
+/* Remove all writeable mappings of a guest frame from the shadows.
+ * Returns non-zero if we need to flush TLBs. 
+ * level and fault_addr desribe how we found this to be a pagetable;
+ * level==0 means we have some other reason for revoking write access. */
+extern int shadow_remove_write_access(struct vcpu *v, mfn_t readonly_mfn,
+                                       unsigned int level,
+                                       unsigned long fault_addr);
+
+/* Remove all mappings of the guest mfn from the shadows. 
+ * Returns non-zero if we need to flush TLBs. */
+extern int shadow_remove_all_mappings(struct vcpu *v, mfn_t target_mfn);
+
+void
+shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn);
+/* This is a HVM page that we thing is no longer a pagetable.
+ * Unshadow it, and recursively unshadow pages that reference it. */
+
+/* Remove all shadows of the guest mfn. */
+extern void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int all);
+static inline void shadow_remove_all_shadows(struct vcpu *v, mfn_t gmfn)
+{
+    sh_remove_shadows(v, gmfn, 1);
+}
+
+/* Add a page to a domain */
+void
+shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn,
+                               unsigned long mfn);
+
+/* Remove a page from a domain */
+void
+shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn,
+                                  unsigned long mfn);
+
+/*
+ * Definitions for the shadow_flags field in page_info.
+ * These flags are stored on *guest* pages...
+ * Bits 1-13 are encodings for the shadow types.
+ */
+#define PGC_SH_type_to_index(_type) ((_type) >> PGC_SH_type_shift)
+#define SHF_page_type_mask \
+    (((1u << (PGC_SH_type_to_index(PGC_SH_max_shadow) + 1u)) - 1u) - \
+     ((1u << PGC_SH_type_to_index(PGC_SH_min_shadow)) - 1u))
+
+#define SHF_L1_32   (1u << PGC_SH_type_to_index(PGC_SH_l1_32_shadow))
+#define SHF_FL1_32  (1u << PGC_SH_type_to_index(PGC_SH_fl1_32_shadow))
+#define SHF_L2_32   (1u << PGC_SH_type_to_index(PGC_SH_l2_32_shadow))
+#define SHF_L1_PAE  (1u << PGC_SH_type_to_index(PGC_SH_l1_pae_shadow))
+#define SHF_FL1_PAE (1u << PGC_SH_type_to_index(PGC_SH_fl1_pae_shadow))
+#define SHF_L2_PAE  (1u << PGC_SH_type_to_index(PGC_SH_l2_pae_shadow))
+#define SHF_L2H_PAE (1u << PGC_SH_type_to_index(PGC_SH_l2h_pae_shadow))
+#define SHF_L3_PAE  (1u << PGC_SH_type_to_index(PGC_SH_l3_pae_shadow))
+#define SHF_L1_64   (1u << PGC_SH_type_to_index(PGC_SH_l1_64_shadow))
+#define SHF_FL1_64  (1u << PGC_SH_type_to_index(PGC_SH_fl1_64_shadow))
+#define SHF_L2_64   (1u << PGC_SH_type_to_index(PGC_SH_l2_64_shadow))
+#define SHF_L3_64   (1u << PGC_SH_type_to_index(PGC_SH_l3_64_shadow))
+#define SHF_L4_64   (1u << PGC_SH_type_to_index(PGC_SH_l4_64_shadow))
+
+/* Used for hysteresis when automatically unhooking mappings on fork/exit */
+#define SHF_unhooked_mappings (1u<<31)
+
+/* 
+ * Allocation of shadow pages 
+ */
+
+/* Return the minumum acceptable number of shadow pages a domain needs */
+unsigned int shadow_min_acceptable_pages(struct domain *d);
+
+/* Set the pool of shadow pages to the required number of MB.
+ * Input will be rounded up to at least min_acceptable_shadow_pages().
+ * Returns 0 for success, 1 for failure. */
+unsigned int shadow_set_allocation(struct domain *d, 
+                                    unsigned int megabytes,
+                                    int *preempted);
+
+/* Return the size of the shadow pool, rounded up to the nearest MB */
+static inline unsigned int shadow_get_allocation(struct domain *d)
+{
+    unsigned int pg = d->arch.shadow.total_pages;
+    return ((pg >> (20 - PAGE_SHIFT))
+            + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
+}
+
+/*
+ * Linked list for chaining entries in the shadow hash table. 
+ */
+struct shadow_hash_entry {
+    struct shadow_hash_entry *next;
+    mfn_t smfn;                 /* MFN of the shadow */
+#ifdef _x86_64_ /* Shorten 'n' so we don't waste a whole word on storing 't' */
+    unsigned long n:56;         /* MFN of guest PT or GFN of guest superpage */
+#else
+    unsigned long n;            /* MFN of guest PT or GFN of guest superpage */
+#endif
+    unsigned char t;            /* shadow type bits, or 0 for empty */
+};
+
+#define SHADOW_HASH_BUCKETS 251
+/* Other possibly useful primes are 509, 1021, 2039, 4093, 8191, 16381 */
+
+
+#if SHADOW_OPTIMIZATIONS & SHOPT_CACHE_WALKS
+/* Optimization: cache the results of guest walks.  This helps with MMIO
+ * and emulated writes, which tend to issue very similar walk requests
+ * repeatedly.  We keep the results of the last few walks, and blow
+ * away the cache on guest cr3 write, mode change, or page fault. */
+
+#define SH_WALK_CACHE_ENTRIES 4
+
+/* Rather than cache a guest walk, which would include mapped pointers 
+ * to pages, we cache what a TLB would remember about the walk: the 
+ * permissions and the l1 gfn */
+struct shadow_walk_cache {
+    unsigned long va;           /* The virtual address (or 0 == unused) */
+    unsigned long gfn;          /* The gfn from the effective l1e   */
+    u32 permissions;            /* The aggregated permission bits   */
+};
+#endif
+
+
+/**************************************************************************/
+/* Guest physmap (p2m) support */
+
+/* Walk another domain's P2M table, mapping pages as we go */
+extern mfn_t
+sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn);
+
+
+/* General conversion function from gfn to mfn */
+static inline mfn_t
+sh_gfn_to_mfn(struct domain *d, unsigned long gfn)
+{
+    if ( !shadow_mode_translate(d) )
+        return _mfn(gfn);
+    else if ( likely(current->domain == d) )
+        return _mfn(get_mfn_from_gpfn(gfn));
+    else
+        return sh_gfn_to_mfn_foreign(d, gfn);
+}
+
+// vcpu-specific version of gfn_to_mfn().  This is where we hide the dirty
+// little secret that, for hvm guests with paging disabled, nearly all of the
+// shadow code actually think that the guest is running on *untranslated* page
+// tables (which is actually domain->phys_table).
+//
+static inline mfn_t
+sh_vcpu_gfn_to_mfn(struct vcpu *v, unsigned long gfn)
+{ 
+    if ( !shadow_vcpu_mode_translate(v) )
+        return _mfn(gfn);
+    if ( likely(current->domain == v->domain) )
+        return _mfn(get_mfn_from_gpfn(gfn));
+    return sh_gfn_to_mfn_foreign(v->domain, gfn);
+}
+
+static inline unsigned long
+sh_mfn_to_gfn(struct domain *d, mfn_t mfn)
+{
+    if ( shadow_mode_translate(d) )
+        return get_gpfn_from_mfn(mfn_x(mfn));
+    else
+        return mfn_x(mfn);
+}
+
+
  
  #endif /* _XEN_SHADOW_H */
  
@@ -49,7 +633,7 @@
   * mode: C
   * c-set-style: "BSD"
   * c-basic-offset: 4
- * tab-width: 4
   * indent-tabs-mode: nil
   * End:
   */
+      
diff --git a/xen/include/asm-x86/shadow2-multi.h b/xen/include/asm-x86/shadow2-multi.h

deleted file mode 100644 (file)

index ba3f528..0000000
--- a/xen/include/asm-x86/shadow2-multi.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/******************************************************************************
- * arch/x86/shadow2-multi.h
- *
- * Shadow2 declarations which will be multiply compiled.
- * Parts of this code are Copyright (c) 2006 by XenSource Inc.
- * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
- * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-extern int 
-SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl1e, SHADOW_LEVELS, GUEST_LEVELS)(
-    struct vcpu *v, mfn_t gl1mfn, void *new_gl1p, u32 size);
-extern int 
-SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl2e, SHADOW_LEVELS, GUEST_LEVELS)(
-    struct vcpu *v, mfn_t gl2mfn, void *new_gl2p, u32 size);
-extern int 
-SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl2he, SHADOW_LEVELS, GUEST_LEVELS)(
-    struct vcpu *v, mfn_t gl2mfn, void *new_gl2p, u32 size);
-extern int 
-SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl3e, SHADOW_LEVELS, GUEST_LEVELS)(
-    struct vcpu *v, mfn_t gl3mfn, void *new_gl3p, u32 size);
-extern int 
-SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl4e, SHADOW_LEVELS, GUEST_LEVELS)(
-    struct vcpu *v, mfn_t gl4mfn, void *new_gl4p, u32 size);
-
-extern void 
-SHADOW2_INTERNAL_NAME(sh2_destroy_l1_shadow, SHADOW_LEVELS, GUEST_LEVELS)(
-    struct vcpu *v, mfn_t smfn);
-extern void 
-SHADOW2_INTERNAL_NAME(sh2_destroy_l2_shadow, SHADOW_LEVELS, GUEST_LEVELS)(
-    struct vcpu *v, mfn_t smfn);
-extern void 
-SHADOW2_INTERNAL_NAME(sh2_destroy_l3_shadow, SHADOW_LEVELS, GUEST_LEVELS)(
-    struct vcpu *v, mfn_t smfn);
-extern void 
-SHADOW2_INTERNAL_NAME(sh2_destroy_l4_shadow, SHADOW_LEVELS, GUEST_LEVELS)(
-    struct vcpu *v, mfn_t smfn);
-
-extern void
-SHADOW2_INTERNAL_NAME(sh2_unpin_all_l3_subshadows, 3, 3)
-    (struct vcpu *v, mfn_t smfn);
-
-extern void 
-SHADOW2_INTERNAL_NAME(sh2_unhook_32b_mappings, SHADOW_LEVELS, GUEST_LEVELS)
-    (struct vcpu *v, mfn_t sl2mfn);
-extern void 
-SHADOW2_INTERNAL_NAME(sh2_unhook_pae_mappings, SHADOW_LEVELS, GUEST_LEVELS)
-    (struct vcpu *v, mfn_t sl3mfn);
-extern void 
-SHADOW2_INTERNAL_NAME(sh2_unhook_64b_mappings, SHADOW_LEVELS, GUEST_LEVELS)
-    (struct vcpu *v, mfn_t sl4mfn);
-
-extern int
-SHADOW2_INTERNAL_NAME(sh2_remove_write_access, SHADOW_LEVELS, GUEST_LEVELS)
-    (struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn);
-extern int
-SHADOW2_INTERNAL_NAME(sh2_remove_all_mappings, SHADOW_LEVELS, GUEST_LEVELS)
-    (struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn);
-
-extern void
-SHADOW2_INTERNAL_NAME(sh2_clear_shadow_entry, SHADOW_LEVELS, GUEST_LEVELS)
-    (struct vcpu *v, void *ep, mfn_t smfn);
-
-extern int
-SHADOW2_INTERNAL_NAME(sh2_remove_l1_shadow, SHADOW_LEVELS, GUEST_LEVELS)
-    (struct vcpu *v, mfn_t sl2mfn, mfn_t sl1mfn);
-extern int
-SHADOW2_INTERNAL_NAME(sh2_remove_l2_shadow, SHADOW_LEVELS, GUEST_LEVELS)
-    (struct vcpu *v, mfn_t sl3mfn, mfn_t sl2mfn);
-extern int
-SHADOW2_INTERNAL_NAME(sh2_remove_l3_shadow, SHADOW_LEVELS, GUEST_LEVELS)
-    (struct vcpu *v, mfn_t sl4mfn, mfn_t sl3mfn);
-
-#if SHADOW2_AUDIT & SHADOW2_AUDIT_ENTRIES
-int 
-SHADOW2_INTERNAL_NAME(sh2_audit_l1_table, SHADOW_LEVELS, GUEST_LEVELS)
-    (struct vcpu *v, mfn_t sl1mfn, mfn_t x);
-int 
-SHADOW2_INTERNAL_NAME(sh2_audit_fl1_table, SHADOW_LEVELS, GUEST_LEVELS)
-    (struct vcpu *v, mfn_t sl1mfn, mfn_t x);
-int 
-SHADOW2_INTERNAL_NAME(sh2_audit_l2_table, SHADOW_LEVELS, GUEST_LEVELS)
-    (struct vcpu *v, mfn_t sl2mfn, mfn_t x);
-int 
-SHADOW2_INTERNAL_NAME(sh2_audit_l3_table, SHADOW_LEVELS, GUEST_LEVELS)
-    (struct vcpu *v, mfn_t sl3mfn, mfn_t x);
-int 
-SHADOW2_INTERNAL_NAME(sh2_audit_l4_table, SHADOW_LEVELS, GUEST_LEVELS)
-    (struct vcpu *v, mfn_t sl4mfn, mfn_t x);
-#endif
-
-#if SHADOW_LEVELS == GUEST_LEVELS
-extern mfn_t
-SHADOW2_INTERNAL_NAME(sh2_make_monitor_table, SHADOW_LEVELS, GUEST_LEVELS)
-    (struct vcpu *v);
-extern void
-SHADOW2_INTERNAL_NAME(sh2_destroy_monitor_table, SHADOW_LEVELS, GUEST_LEVELS)
-    (struct vcpu *v, mfn_t mmfn);
-#endif
-
-extern struct shadow2_paging_mode 
-SHADOW2_INTERNAL_NAME(sh2_paging_mode, SHADOW_LEVELS, GUEST_LEVELS);
diff --git a/xen/include/asm-x86/shadow2-private.h b/xen/include/asm-x86/shadow2-private.h

deleted file mode 100644 (file)

index 8637692..0000000
--- a/xen/include/asm-x86/shadow2-private.h
+++ /dev/null
@@ -1,593 +0,0 @@
-/******************************************************************************
- * arch/x86/shadow2-private.h
- *
- * Shadow2 code that is private, and does not need to be multiply compiled.
- * Parts of this code are Copyright (c) 2006 by XenSource Inc.
- * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
- * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#ifndef _XEN_SHADOW2_PRIVATE_H
-#define _XEN_SHADOW2_PRIVATE_H
-
-// In order to override the definition of mfn_to_page, we make sure page.h has
-// been included...
-#include <asm/page.h>
-#include <xen/domain_page.h>
-#include <asm/x86_emulate.h>
-#include <asm/hvm/support.h>
-
-
-/******************************************************************************
- * Definitions for the use of the "available" bits in the shadow PTEs.
- *
- * Review of the low 12 bits of a shadow page table entry:
- *
- *         in a guest:                      in a shadow:
- * Bit 11: _PAGE_AVAIL2, aka _PAGE_GNTTAB
- * Bit 10: _PAGE_AVAIL1                     _PAGE_SHADOW_RW ("SW" below)
- * Bit  9: _PAGE_AVAIL0                     _PAGE_SHADOW_PRESENT ("SP" below)
- * Bit  8: _PAGE_GLOBAL                     _PAGE_SHADOW_MMIO ("MMIO" below),
- *                                          aka _PAGE_SHADOW_GUEST_NOT_PRESENT
- * Bit  7: _PAGE_PSE, aka _PAGE_PAT
- * Bit  6: _PAGE_DIRTY
- * Bit  5: _PAGE_ACCESSED
- * Bit  4: _PAGE_PCD
- * Bit  3: _PAGE_PWT
- * Bit  2: _PAGE_USER
- * Bit  1: _PAGE_RW ("GW" below)
- * Bit  0: _PAGE_PRESENT ("GP" below)
- *
- * Given a guest entry, as shown below, we can expect the following in the
- * corresponding shadow entry:
- *
- * Guest entry  Shadow entry      Commentary
- * -----------  ----------------  ---------------------------------------------
- *       Maps     
- * GP GW  IO    GP SP GW SW MMIO 
- * -- -- ----   -- -- -- -- ----
- *  -  -   -     0  0  0  0   0   The guest entry has not yet been shadowed.
- *  0  -   -     0  0  0  0   1   The guest entry is marked not-present.
- *  1  1  no     ?  1  ?  1   0   Writable entry in the guest.
- *  1  0  no     ?  1  0  0   0   Read-only entry in the guest.
- *  1  1  yes    0  1  ?  1   1   Writable MMIO mapping in the guest.
- *  1  0  yes    0  1  0  0   1   Read-only MMIO mapping in the guest.
- *
- * Normally, we would expect that GP=1 in the guest to imply GP=1 in the
- * shadow, and similarly for GW=1.  However, various functionality that may be
- * implemented via the shadow can cause GP or GW to be cleared in such cases.
- * A & D bit emulation is a prime example of such functionality.
- *
- * If _PAGE_SHADOW_PRESENT is zero, then the _PAGE_PRESENT bit in that same
- * entry will always be zero, too.
-
- * Bit 11 is used in debug builds as the _PAGE_GNTTAB bit in PV guests.  It is
- * currently available for random (ab)use in shadow entries.
- *
- * Bit 8 (the global bit) could be propagated from an HVM guest to the shadow,
- * but currently there is no benefit, as the guest's TLB is flushed on every
- * transition of CR3 anyway due to the HVM exit/re-entry.
- *
- * In shadow entries in which the _PAGE_SHADOW_PRESENT is set, bit 8 is used
- * as the _PAGE_SHADOW_MMIO bit.  In such entries, if _PAGE_SHADOW_MMIO is
- * set, then the entry contains the *gfn* directly from the corresponding
- * guest entry (not an mfn!!).
- *
- * Bit 7 is set in a guest L2 to signify a superpage entry.  The current
- * shadow code splinters superpage mappings into 512 or 1024 4K mappings; the
- * resulting shadow L1 table is called an FL1.  Note that there is no guest
- * page that corresponds to an FL1.
- *
- * Bit 7 in a guest L1 is the PAT2 bit.  Currently we do not support PAT in
- * this shadow code.
- *
- * Bit 6 is the dirty bit.
- *
- * Bit 5 is the accessed bit.
- *
- * Bit 4 is the cache disable bit.  If set in a guest, the hardware is
- * supposed to refuse to cache anything found via this entry.  It can be set
- * in an L4e, L3e, L2e, or L1e.  This shadow code currently does not support
- * cache disable bits.  They are silently ignored.
- *
- * Bit 4 is a guest L1 is also the PAT1 bit.  Currently we do not support PAT
- * in this shadow code.
- *
- * Bit 3 is the cache write-thru bit.  If set in a guest, the hardware is
- * supposed to use write-thru instead of write-back caching for anything found
- * via this entry.  It can be set in an L4e, L3e, L2e, or L1e.  This shadow
- * code currently does not support cache write-thru bits.  They are silently
- * ignored.
- *
- * Bit 3 is a guest L1 is also the PAT0 bit.  Currently we do not support PAT
- * in this shadow code.
- *
- * Bit 2 is the user bit.
- *
- * Bit 1 is the read-write bit.
- *
- * Bit 0 is the present bit.
- */
-
-// Copy of the _PAGE_RW bit from the guest's PTE, appropriately zero'ed by
-// the appropriate shadow rules.
-#define _PAGE_SHADOW_RW                 _PAGE_AVAIL1
-
-// Copy of the _PAGE_PRESENT bit from the guest's PTE
-#define _PAGE_SHADOW_PRESENT            _PAGE_AVAIL0
-
-// The matching guest entry maps MMIO space
-#define _PAGE_SHADOW_MMIO               _PAGE_GLOBAL
-
-// Shadow flags value used when the guest is not present
-#define _PAGE_SHADOW_GUEST_NOT_PRESENT  _PAGE_GLOBAL
-
-
-/******************************************************************************
- * Debug and error-message output
- */
-#define SHADOW2_PRINTK(_f, _a...)                                     \
-    debugtrace_printk("sh2: %s(): " _f, __func__, ##_a)
-#define SHADOW2_ERROR(_f, _a...)                                      \
-    printk("sh2 error: %s(): " _f, __func__, ##_a)
-#define SHADOW2_DEBUG(flag, _f, _a...)                                \
-    do {                                                              \
-        if (SHADOW2_DEBUG_ ## flag)                                   \
-            debugtrace_printk("sh2debug: %s(): " _f, __func__, ##_a); \
-    } while (0)
-
-// The flags for use with SHADOW2_DEBUG:
-#define SHADOW2_DEBUG_PROPAGATE         0
-#define SHADOW2_DEBUG_MAKE_SHADOW       0
-#define SHADOW2_DEBUG_DESTROY_SHADOW    0
-#define SHADOW2_DEBUG_P2M               0
-#define SHADOW2_DEBUG_A_AND_D           0
-#define SHADOW2_DEBUG_EMULATE           0
-#define SHADOW2_DEBUG_LOGDIRTY          1
-
-
-/******************************************************************************
- * Auditing routines 
- */
-
-#if SHADOW2_AUDIT & SHADOW2_AUDIT_ENTRIES_FULL
-extern void shadow2_audit_tables(struct vcpu *v);
-#else
-#define shadow2_audit_tables(_v) do {} while(0)
-#endif
-
-#if SHADOW2_AUDIT & SHADOW2_AUDIT_P2M
-extern void shadow2_audit_p2m(struct domain *d);
-#else
-#define shadow2_audit_p2m(_d) do {} while(0)
-#endif
-
-
-/******************************************************************************
- * Mechanism for double-checking the optimized pagefault path: this
- * structure contains a record of actions taken by the fault handling
- * code.  In paranoid mode, the fast-path code fills out one of these
- * structures (but doesn't take any actual action) and then the normal 
- * path fills in another.  When the fault handler finishes, the 
- * two are compared */
-
-#ifdef SHADOW2_OPTIMIZATION_PARANOIA
-
-typedef struct shadow2_action_log sh2_log_t;
-struct shadow2_action_log {
-    paddr_t ad[CONFIG_PAGING_LEVELS];  /* A & D bits propagated here */
-    paddr_t mmio;                      /* Address of an mmio operation */
-    int rv;                            /* Result of the fault handler */
-};
-
-/* There are two logs, one for the fast path, one for the normal path */
-enum sh2_log_type { log_slow = 0, log_fast= 1 };
-
-/* Alloc and zero the logs */
-static inline void sh2_init_log(struct vcpu *v) 
-{
-    if ( unlikely(!v->arch.shadow2.action_log) ) 
-        v->arch.shadow2.action_log = xmalloc_array(sh2_log_t, 2);
-    ASSERT(v->arch.shadow2.action_log);
-    memset(v->arch.shadow2.action_log, 0, 2 * sizeof (sh2_log_t));
-}
-
-/* Log an A&D-bit update */
-static inline void sh2_log_ad(struct vcpu *v, paddr_t e, unsigned int level)
-{
-    v->arch.shadow2.action_log[v->arch.shadow2.action_index].ad[level] = e;
-}
-
-/* Log an MMIO address */
-static inline void sh2_log_mmio(struct vcpu *v, paddr_t m)
-{
-    v->arch.shadow2.action_log[v->arch.shadow2.action_index].mmio = m;
-}
-
-/* Log the result */
-static inline void sh2_log_rv(struct vcpu *v, int rv)
-{
-    v->arch.shadow2.action_log[v->arch.shadow2.action_index].rv = rv;
-}
-
-/* Set which mode we're in */
-static inline void sh2_set_log_mode(struct vcpu *v, enum sh2_log_type t) 
-{
-    v->arch.shadow2.action_index = t;
-}
-
-/* Know not to take action, because we're only checking the mechanism */
-static inline int sh2_take_no_action(struct vcpu *v) 
-{
-    return (v->arch.shadow2.action_index == log_fast);
-}
-
-#else /* Non-paranoid mode: these logs do not exist */
-
-#define sh2_init_log(_v) do { (void)(_v); } while(0)
-#define sh2_set_log_mode(_v,_t) do { (void)(_v); } while(0)
-#define sh2_log_ad(_v,_e,_l) do { (void)(_v),(void)(_e),(void)(_l); } while (0)
-#define sh2_log_mmio(_v,_m) do { (void)(_v),(void)(_m); } while (0)
-#define sh2_log_rv(_v,_r) do { (void)(_v),(void)(_r); } while (0)
-#define sh2_take_no_action(_v) (((void)(_v)), 0)
-
-#endif /* SHADOW2_OPTIMIZATION_PARANOIA */
-
-
-/******************************************************************************
- * Macro for dealing with the naming of the internal names of the
- * shadow code's external entry points.
- */
-#define SHADOW2_INTERNAL_NAME_HIDDEN(name, shadow_levels, guest_levels) \
-    name ## __shadow_ ## shadow_levels ## _guest_ ## guest_levels
-#define SHADOW2_INTERNAL_NAME(name, shadow_levels, guest_levels) \
-    SHADOW2_INTERNAL_NAME_HIDDEN(name, shadow_levels, guest_levels)
-
-#if CONFIG_PAGING_LEVELS == 2
-#define GUEST_LEVELS  2
-#define SHADOW_LEVELS 2
-#include <asm/shadow2-multi.h>
-#undef GUEST_LEVELS
-#undef SHADOW_LEVELS
-#endif /* CONFIG_PAGING_LEVELS == 2 */
-
-#if CONFIG_PAGING_LEVELS == 3
-#define GUEST_LEVELS  2
-#define SHADOW_LEVELS 3
-#include <asm/shadow2-multi.h>
-#undef GUEST_LEVELS
-#undef SHADOW_LEVELS
-
-#define GUEST_LEVELS  3
-#define SHADOW_LEVELS 3
-#include <asm/shadow2-multi.h>
-#undef GUEST_LEVELS
-#undef SHADOW_LEVELS
-#endif /* CONFIG_PAGING_LEVELS == 3 */
-
-#if CONFIG_PAGING_LEVELS == 4
-#define GUEST_LEVELS  2
-#define SHADOW_LEVELS 3
-#include <asm/shadow2-multi.h>
-#undef GUEST_LEVELS
-#undef SHADOW_LEVELS
-
-#define GUEST_LEVELS  3
-#define SHADOW_LEVELS 3
-#include <asm/shadow2-multi.h>
-#undef GUEST_LEVELS
-#undef SHADOW_LEVELS
-
-#define GUEST_LEVELS  3
-#define SHADOW_LEVELS 4
-#include <asm/shadow2-multi.h>
-#undef GUEST_LEVELS
-#undef SHADOW_LEVELS
-
-#define GUEST_LEVELS  4
-#define SHADOW_LEVELS 4
-#include <asm/shadow2-multi.h>
-#undef GUEST_LEVELS
-#undef SHADOW_LEVELS
-#endif /* CONFIG_PAGING_LEVELS == 4 */
-
-
-/******************************************************************************
- * Various function declarations 
- */
-
-/* x86 emulator support */
-extern struct x86_emulate_ops shadow2_emulator_ops;
-
-/* Hash table functions */
-mfn_t shadow2_hash_lookup(struct vcpu *v, unsigned long n, u8 t);
-void  shadow2_hash_insert(struct vcpu *v, unsigned long n, u8 t, mfn_t smfn);
-void  shadow2_hash_delete(struct vcpu *v, unsigned long n, u8 t, mfn_t smfn);
-
-/* shadow promotion */
-void shadow2_promote(struct vcpu *v, mfn_t gmfn, u32 type);
-void shadow2_demote(struct vcpu *v, mfn_t gmfn, u32 type);
-
-/* Shadow page allocation functions */
-void  shadow2_prealloc(struct domain *d, unsigned int order);
-mfn_t shadow2_alloc(struct domain *d, 
-                    u32 shadow_type,
-                    unsigned long backpointer);
-void  shadow2_free(struct domain *d, mfn_t smfn);
-
-/* Function to convert a shadow to log-dirty */
-void shadow2_convert_to_log_dirty(struct vcpu *v, mfn_t smfn);
-
-/* Dispatcher function: call the per-mode function that will unhook the
- * non-Xen mappings in this top-level shadow mfn */
-void shadow2_unhook_mappings(struct vcpu *v, mfn_t smfn);
-
-/* Re-sync copies of PAE shadow L3 tables if they have been changed */
-void sh2_pae_recopy(struct domain *d);
-
-/* Install the xen mappings in various flavours of shadow */
-void sh2_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn);
-void sh2_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn);
-void sh2_install_xen_entries_in_l3(struct vcpu *v, mfn_t gl3mfn, mfn_t sl3mfn);
-void sh2_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn);
-
-
-/******************************************************************************
- * MFN/page-info handling 
- */
-
-// Override mfn_to_page from asm/page.h, which was #include'd above,
-// in order to make it work with our mfn type.
-#undef mfn_to_page
-#define mfn_to_page(_mfn) (frame_table + mfn_x(_mfn))
-
-// Override page_to_mfn from asm/page.h, which was #include'd above,
-// in order to make it work with our mfn type.
-#undef page_to_mfn
-#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
-
-// Override mfn_valid from asm/page.h, which was #include'd above,
-// in order to make it work with our mfn type.
-#undef mfn_valid
-#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
-
-// Provide mfn_t-aware versions of common xen functions
-static inline void *
-sh2_map_domain_page(mfn_t mfn)
-{
-    /* XXX Using the monitor-table as a map will happen here  */
-    return map_domain_page(mfn_x(mfn));
-}
-
-static inline void 
-sh2_unmap_domain_page(void *p) 
-{
-    /* XXX Using the monitor-table as a map will happen here  */
-    unmap_domain_page(p);
-}
-
-static inline void *
-sh2_map_domain_page_global(mfn_t mfn)
-{
-    /* XXX Using the monitor-table as a map will happen here  */
-    return map_domain_page_global(mfn_x(mfn));
-}
-
-static inline void 
-sh2_unmap_domain_page_global(void *p) 
-{
-    /* XXX Using the monitor-table as a map will happen here  */
-    unmap_domain_page_global(p);
-}
-
-static inline int
-sh2_mfn_is_dirty(struct domain *d, mfn_t gmfn)
-/* Is this guest page dirty?  Call only in log-dirty mode. */
-{
-    unsigned long pfn;
-    ASSERT(shadow2_mode_log_dirty(d));
-    ASSERT(d->arch.shadow2.dirty_bitmap != NULL);
-
-    /* We /really/ mean PFN here, even for non-translated guests. */
-    pfn = get_gpfn_from_mfn(mfn_x(gmfn));
-    if ( likely(VALID_M2P(pfn))
-         && likely(pfn < d->arch.shadow2.dirty_bitmap_size) 
-         && test_bit(pfn, d->arch.shadow2.dirty_bitmap) )
-        return 1;
-
-    return 0;
-}
-
-static inline int
-sh2_mfn_is_a_page_table(mfn_t gmfn)
-{
-    struct page_info *page = mfn_to_page(gmfn);
-    struct domain *owner;
-    unsigned long type_info;
-
-    if ( !valid_mfn(gmfn) )
-        return 0;
-
-    owner = page_get_owner(page);
-    if ( owner && shadow2_mode_refcounts(owner) 
-         && (page->count_info & PGC_page_table) )
-        return 1; 
-
-    type_info = page->u.inuse.type_info & PGT_type_mask;
-    return type_info && (type_info <= PGT_l4_page_table);
-}
-
-
-/**************************************************************************/
-/* Shadow-page refcounting. See comment in shadow2-common.c about the  
- * use of struct page_info fields for shadow pages */
-
-void sh2_destroy_shadow(struct vcpu *v, mfn_t smfn);
-
-/* Increase the refcount of a shadow page.  Arguments are the mfn to refcount, 
- * and the physical address of the shadow entry that holds the ref (or zero
- * if the ref is held by something else) */
-static inline void sh2_get_ref(mfn_t smfn, paddr_t entry_pa)
-{
-    u32 x, nx;
-    struct page_info *page = mfn_to_page(smfn);
-
-    ASSERT(mfn_valid(smfn));
-
-    x = page->count_info & PGC_SH2_count_mask;
-    nx = x + 1;
-
-    if ( unlikely(nx & ~PGC_SH2_count_mask) )
-    {
-        SHADOW2_PRINTK("shadow ref overflow, gmfn=%" PRtype_info " smfn=%lx\n",
-                       page->u.inuse.type_info, mfn_x(smfn));
-        domain_crash_synchronous();
-    }
-    
-    /* Guarded by the shadow lock, so no need for atomic update */
-    page->count_info &= ~PGC_SH2_count_mask;
-    page->count_info |= nx;
-
-    /* We remember the first shadow entry that points to each shadow. */
-    if ( entry_pa != 0 && page->up == 0 ) 
-        page->up = entry_pa;
-}
-
-
-/* Decrease the refcount of a shadow page.  As for get_ref, takes the
- * physical address of the shadow entry that held this reference. */
-static inline void sh2_put_ref(struct vcpu *v, mfn_t smfn, paddr_t entry_pa)
-{
-    u32 x, nx;
-    struct page_info *page = mfn_to_page(smfn);
-
-    ASSERT(mfn_valid(smfn));
-    ASSERT(page_get_owner(page) == NULL);
-
-    /* If this is the entry in the up-pointer, remove it */
-    if ( entry_pa != 0 && page->up == entry_pa ) 
-        page->up = 0;
-
-    x = page->count_info & PGC_SH2_count_mask;
-    nx = x - 1;
-
-    if ( unlikely(x == 0) ) 
-    {
-        SHADOW2_PRINTK("shadow ref underflow, smfn=%lx oc=%08x t=%" 
-                       PRtype_info "\n",
-                       mfn_x(smfn),
-                       page->count_info & PGC_SH2_count_mask,
-                       page->u.inuse.type_info);
-        domain_crash_synchronous();
-    }
-
-    /* Guarded by the shadow lock, so no need for atomic update */
-    page->count_info &= ~PGC_SH2_count_mask;
-    page->count_info |= nx;
-
-    if ( unlikely(nx == 0) ) 
-        sh2_destroy_shadow(v, smfn);
-}
-
-
-/* Pin a shadow page: take an extra refcount and set the pin bit. */
-static inline void sh2_pin(mfn_t smfn)
-{
-    struct page_info *page;
-    
-    ASSERT(mfn_valid(smfn));
-    page = mfn_to_page(smfn);
-    if ( !(page->count_info & PGC_SH2_pinned) ) 
-    {
-        sh2_get_ref(smfn, 0);
-        page->count_info |= PGC_SH2_pinned;
-    }
-}
-
-/* Unpin a shadow page: unset the pin bit and release the extra ref. */
-static inline void sh2_unpin(struct vcpu *v, mfn_t smfn)
-{
-    struct page_info *page;
-    
-    ASSERT(mfn_valid(smfn));
-    page = mfn_to_page(smfn);
-    if ( page->count_info & PGC_SH2_pinned )
-    {
-        page->count_info &= ~PGC_SH2_pinned;
-        sh2_put_ref(v, smfn, 0);
-    }
-}
-
-/**************************************************************************/
-/* Guest physmap (p2m) support */
-
-/* Read our own P2M table, checking in the linear pagetables first to be
- * sure that we will succeed.  Call this function if you expect it to
- * fail often, as it avoids page faults.  If you expect to succeed, use
- * vcpu_gfn_to_mfn, which copy_from_user()s the entry */
-static inline mfn_t
-vcpu_gfn_to_mfn_nofault(struct vcpu *v, unsigned long gfn)
-{
-    unsigned long entry_addr = (unsigned long) &phys_to_machine_mapping[gfn];
-#if CONFIG_PAGING_LEVELS >= 4
-    l4_pgentry_t *l4e;
-    l3_pgentry_t *l3e;
-#endif
-    l2_pgentry_t *l2e;
-    l1_pgentry_t *l1e;
-
-    ASSERT(current == v);
-    if ( !shadow2_vcpu_mode_translate(v) )
-        return _mfn(gfn);
-
-#if CONFIG_PAGING_LEVELS > 2
-    if ( gfn > (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t) ) 
-        /* This pfn is higher than the p2m map can hold */
-        return _mfn(INVALID_MFN);
-#endif
-    
-    /* Walk the linear pagetables.  Note that this is *not* the same as 
-     * the walk in sh2_gfn_to_mfn_foreign, which is walking the p2m map */
-#if CONFIG_PAGING_LEVELS >= 4
-    l4e = __linear_l4_table + l4_linear_offset(entry_addr);
-    if ( !(l4e_get_flags(*l4e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN);
-    l3e = __linear_l3_table + l3_linear_offset(entry_addr);
-    if ( !(l3e_get_flags(*l3e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN);
-#endif
-    l2e = __linear_l2_table + l2_linear_offset(entry_addr);
-    if ( !(l2e_get_flags(*l2e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN);
-    l1e = __linear_l1_table + l1_linear_offset(entry_addr);
-    if ( !(l1e_get_flags(*l1e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN);
-
-    /* Safe to look at this part of the table */
-    if ( l1e_get_flags(phys_to_machine_mapping[gfn])  & _PAGE_PRESENT )
-        return _mfn(l1e_get_pfn(phys_to_machine_mapping[gfn]));
-    
-    return _mfn(INVALID_MFN);
-}
-
-
-#endif /* _XEN_SHADOW2_PRIVATE_H */
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/xen/include/asm-x86/shadow2-types.h b/xen/include/asm-x86/shadow2-types.h

deleted file mode 100644 (file)

index 13107f4..0000000
--- a/xen/include/asm-x86/shadow2-types.h
+++ /dev/null
@@ -1,692 +0,0 @@
-/******************************************************************************
- * include/asm-x86/shadow2-types.h
- * 
- * Parts of this code are Copyright (c) 2006 by XenSource Inc.
- * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
- * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#ifndef _XEN_SHADOW2_TYPES_H
-#define _XEN_SHADOW2_TYPES_H
-
-// Map a shadow page
-static inline void *
-map_shadow_page(mfn_t smfn)
-{
-    // XXX -- Possible optimization/measurement question for 32-bit and PAE
-    //        hypervisors:
-    //        How often is this smfn already available in the shadow linear
-    //        table?  Might it be worth checking that table first,
-    //        presumably using the reverse map hint in the page_info of this
-    //        smfn, rather than calling map_domain_page()?
-    //
-    return sh2_map_domain_page(smfn);
-}
-
-// matching unmap for map_shadow_page()
-static inline void
-unmap_shadow_page(void *p)
-{
-    sh2_unmap_domain_page(p);
-}
-
-/* 
- * Define various types for handling pagetabels, based on these options:
- * SHADOW_PAGING_LEVELS : Number of levels of shadow pagetables
- * GUEST_PAGING_LEVELS  : Number of levels of guest pagetables
- */
-
-#if (CONFIG_PAGING_LEVELS < SHADOW_PAGING_LEVELS) 
-#error Cannot have more levels of shadow pagetables than host pagetables
-#endif
-
-#if (SHADOW_PAGING_LEVELS < GUEST_PAGING_LEVELS) 
-#error Cannot have more levels of guest pagetables than shadow pagetables
-#endif
-
-#if SHADOW_PAGING_LEVELS == 2
-#define SHADOW_L1_PAGETABLE_ENTRIES    1024
-#define SHADOW_L2_PAGETABLE_ENTRIES    1024
-#define SHADOW_L1_PAGETABLE_SHIFT        12
-#define SHADOW_L2_PAGETABLE_SHIFT        22
-#endif
-
-#if SHADOW_PAGING_LEVELS == 3
-#define SHADOW_L1_PAGETABLE_ENTRIES     512
-#define SHADOW_L2_PAGETABLE_ENTRIES     512
-#define SHADOW_L3_PAGETABLE_ENTRIES       4
-#define SHADOW_L1_PAGETABLE_SHIFT        12
-#define SHADOW_L2_PAGETABLE_SHIFT        21
-#define SHADOW_L3_PAGETABLE_SHIFT        30
-#endif
-
-#if SHADOW_PAGING_LEVELS == 4
-#define SHADOW_L1_PAGETABLE_ENTRIES     512
-#define SHADOW_L2_PAGETABLE_ENTRIES     512
-#define SHADOW_L3_PAGETABLE_ENTRIES     512
-#define SHADOW_L4_PAGETABLE_ENTRIES     512
-#define SHADOW_L1_PAGETABLE_SHIFT        12
-#define SHADOW_L2_PAGETABLE_SHIFT        21
-#define SHADOW_L3_PAGETABLE_SHIFT        30
-#define SHADOW_L4_PAGETABLE_SHIFT        39
-#endif
-
-/* Types of the shadow page tables */
-typedef l1_pgentry_t shadow_l1e_t;
-typedef l2_pgentry_t shadow_l2e_t;
-#if SHADOW_PAGING_LEVELS >= 3
-typedef l3_pgentry_t shadow_l3e_t;
-#if SHADOW_PAGING_LEVELS >= 4
-typedef l4_pgentry_t shadow_l4e_t;
-#endif
-#endif
-
-/* Access functions for them */
-static inline paddr_t shadow_l1e_get_paddr(shadow_l1e_t sl1e)
-{ return l1e_get_paddr(sl1e); }
-static inline paddr_t shadow_l2e_get_paddr(shadow_l2e_t sl2e)
-{ return l2e_get_paddr(sl2e); }
-#if SHADOW_PAGING_LEVELS >= 3
-static inline paddr_t shadow_l3e_get_paddr(shadow_l3e_t sl3e)
-{ return l3e_get_paddr(sl3e); }
-#if SHADOW_PAGING_LEVELS >= 4
-static inline paddr_t shadow_l4e_get_paddr(shadow_l4e_t sl4e)
-{ return l4e_get_paddr(sl4e); }
-#endif
-#endif
-
-static inline mfn_t shadow_l1e_get_mfn(shadow_l1e_t sl1e)
-{ return _mfn(l1e_get_pfn(sl1e)); }
-static inline mfn_t shadow_l2e_get_mfn(shadow_l2e_t sl2e)
-{ return _mfn(l2e_get_pfn(sl2e)); }
-#if SHADOW_PAGING_LEVELS >= 3
-static inline mfn_t shadow_l3e_get_mfn(shadow_l3e_t sl3e)
-{ return _mfn(l3e_get_pfn(sl3e)); }
-#if SHADOW_PAGING_LEVELS >= 4
-static inline mfn_t shadow_l4e_get_mfn(shadow_l4e_t sl4e)
-{ return _mfn(l4e_get_pfn(sl4e)); }
-#endif
-#endif
-
-static inline u32 shadow_l1e_get_flags(shadow_l1e_t sl1e)
-{ return l1e_get_flags(sl1e); }
-static inline u32 shadow_l2e_get_flags(shadow_l2e_t sl2e)
-{ return l2e_get_flags(sl2e); }
-#if SHADOW_PAGING_LEVELS >= 3
-static inline u32 shadow_l3e_get_flags(shadow_l3e_t sl3e)
-{ return l3e_get_flags(sl3e); }
-#if SHADOW_PAGING_LEVELS >= 4
-static inline u32 shadow_l4e_get_flags(shadow_l4e_t sl4e)
-{ return l4e_get_flags(sl4e); }
-#endif
-#endif
-
-static inline shadow_l1e_t
-shadow_l1e_remove_flags(shadow_l1e_t sl1e, u32 flags)
-{ l1e_remove_flags(sl1e, flags); return sl1e; }
-
-static inline shadow_l1e_t shadow_l1e_empty(void) 
-{ return l1e_empty(); }
-static inline shadow_l2e_t shadow_l2e_empty(void) 
-{ return l2e_empty(); }
-#if SHADOW_PAGING_LEVELS >= 3
-static inline shadow_l3e_t shadow_l3e_empty(void) 
-{ return l3e_empty(); }
-#if SHADOW_PAGING_LEVELS >= 4
-static inline shadow_l4e_t shadow_l4e_empty(void) 
-{ return l4e_empty(); }
-#endif
-#endif
-
-static inline shadow_l1e_t shadow_l1e_from_mfn(mfn_t mfn, u32 flags)
-{ return l1e_from_pfn(mfn_x(mfn), flags); }
-static inline shadow_l2e_t shadow_l2e_from_mfn(mfn_t mfn, u32 flags)
-{ return l2e_from_pfn(mfn_x(mfn), flags); }
-#if SHADOW_PAGING_LEVELS >= 3
-static inline shadow_l3e_t shadow_l3e_from_mfn(mfn_t mfn, u32 flags)
-{ return l3e_from_pfn(mfn_x(mfn), flags); }
-#if SHADOW_PAGING_LEVELS >= 4
-static inline shadow_l4e_t shadow_l4e_from_mfn(mfn_t mfn, u32 flags)
-{ return l4e_from_pfn(mfn_x(mfn), flags); }
-#endif
-#endif
-
-#define shadow_l1_table_offset(a) l1_table_offset(a)
-#define shadow_l2_table_offset(a) l2_table_offset(a)
-#define shadow_l3_table_offset(a) l3_table_offset(a)
-#define shadow_l4_table_offset(a) l4_table_offset(a)
-
-/**************************************************************************/
-/* Access to the linear mapping of shadow page tables. */
-
-/* Offsets into each level of the linear mapping for a virtual address. */
-#define shadow_l1_linear_offset(_a)                                           \
-        (((_a) & VADDR_MASK) >> SHADOW_L1_PAGETABLE_SHIFT)
-#define shadow_l2_linear_offset(_a)                                           \
-        (((_a) & VADDR_MASK) >> SHADOW_L2_PAGETABLE_SHIFT)
-#define shadow_l3_linear_offset(_a)                                           \
-        (((_a) & VADDR_MASK) >> SHADOW_L3_PAGETABLE_SHIFT)
-#define shadow_l4_linear_offset(_a)                                           \
-        (((_a) & VADDR_MASK) >> SHADOW_L4_PAGETABLE_SHIFT)
-
-/* Where to find each level of the linear mapping.  For PV guests, we use 
- * the shadow linear-map self-entry as many times as we need.  For HVM 
- * guests, the shadow doesn't have a linear-map self-entry so we must use 
- * the monitor-table's linear-map entry N-1 times and then the shadow-map 
- * entry once. */
-#define __sh2_linear_l1_table ((shadow_l1e_t *)(SH_LINEAR_PT_VIRT_START))
-#define __sh2_linear_l2_table ((shadow_l2e_t *)                               \
-    (__sh2_linear_l1_table + shadow_l1_linear_offset(SH_LINEAR_PT_VIRT_START)))
-
-// shadow linear L3 and L4 tables only exist in 4 level paging...
-#if SHADOW_PAGING_LEVELS == 4
-#define __sh2_linear_l3_table ((shadow_l3e_t *)                               \
-    (__sh2_linear_l2_table + shadow_l2_linear_offset(SH_LINEAR_PT_VIRT_START)))
-#define __sh2_linear_l4_table ((shadow_l4e_t *)                               \
-    (__sh2_linear_l3_table + shadow_l3_linear_offset(SH_LINEAR_PT_VIRT_START)))
-#endif
-
-#define sh2_linear_l1_table(v) ({ \
-    ASSERT(current == (v)); \
-    __sh2_linear_l1_table; \
-})
-
-#define sh2_linear_l2_table(v) ({ \
-    ASSERT(current == (v)); \
-    ((shadow_l2e_t *) \
-     (hvm_guest(v) ? __linear_l1_table : __sh2_linear_l1_table) + \
-     shadow_l1_linear_offset(SH_LINEAR_PT_VIRT_START)); \
-})
-
-// shadow linear L3 and L4 tables only exist in 4 level paging...
-#if SHADOW_PAGING_LEVELS == 4
-#define sh2_linear_l3_table(v) ({ \
-    ASSERT(current == (v)); \
-    ((shadow_l3e_t *) \
-     (hvm_guest(v) ? __linear_l2_table : __sh2_linear_l2_table) + \
-      shadow_l2_linear_offset(SH_LINEAR_PT_VIRT_START)); \
-})
-
-// we use l4_pgentry_t instead of shadow_l4e_t below because shadow_l4e_t is
-// not defined for when xen_levels==4 & shadow_levels==3...
-#define sh2_linear_l4_table(v) ({ \
-    ASSERT(current == (v)); \
-    ((l4_pgentry_t *) \
-     (hvm_guest(v) ? __linear_l3_table : __sh2_linear_l3_table) + \
-      shadow_l3_linear_offset(SH_LINEAR_PT_VIRT_START)); \
-})
-#endif
-
-#if GUEST_PAGING_LEVELS == 2
-
-#include <asm/page-guest32.h>
-
-#define GUEST_L1_PAGETABLE_ENTRIES     1024
-#define GUEST_L2_PAGETABLE_ENTRIES     1024
-#define GUEST_L1_PAGETABLE_SHIFT         12
-#define GUEST_L2_PAGETABLE_SHIFT         22
-
-/* Type of the guest's frame numbers */
-TYPE_SAFE(u32,gfn)
-#define INVALID_GFN ((u32)(-1u))
-#define SH2_PRI_gfn "05x"
-
-/* Types of the guest's page tables */
-typedef l1_pgentry_32_t guest_l1e_t;
-typedef l2_pgentry_32_t guest_l2e_t;
-
-/* Access functions for them */
-static inline paddr_t guest_l1e_get_paddr(guest_l1e_t gl1e)
-{ return l1e_get_paddr_32(gl1e); }
-static inline paddr_t guest_l2e_get_paddr(guest_l2e_t gl2e)
-{ return l2e_get_paddr_32(gl2e); }
-
-static inline gfn_t guest_l1e_get_gfn(guest_l1e_t gl1e)
-{ return _gfn(l1e_get_paddr_32(gl1e) >> PAGE_SHIFT); }
-static inline gfn_t guest_l2e_get_gfn(guest_l2e_t gl2e)
-{ return _gfn(l2e_get_paddr_32(gl2e) >> PAGE_SHIFT); }
-
-static inline u32 guest_l1e_get_flags(guest_l1e_t gl1e)
-{ return l1e_get_flags_32(gl1e); }
-static inline u32 guest_l2e_get_flags(guest_l2e_t gl2e)
-{ return l2e_get_flags_32(gl2e); }
-
-static inline guest_l1e_t guest_l1e_add_flags(guest_l1e_t gl1e, u32 flags)
-{ l1e_add_flags_32(gl1e, flags); return gl1e; }
-static inline guest_l2e_t guest_l2e_add_flags(guest_l2e_t gl2e, u32 flags)
-{ l2e_add_flags_32(gl2e, flags); return gl2e; }
-
-static inline guest_l1e_t guest_l1e_from_gfn(gfn_t gfn, u32 flags)
-{ return l1e_from_pfn_32(gfn_x(gfn), flags); }
-static inline guest_l2e_t guest_l2e_from_gfn(gfn_t gfn, u32 flags)
-{ return l2e_from_pfn_32(gfn_x(gfn), flags); }
-
-#define guest_l1_table_offset(a) l1_table_offset_32(a)
-#define guest_l2_table_offset(a) l2_table_offset_32(a)
-
-/* The shadow types needed for the various levels. */
-#define PGC_SH2_l1_shadow  PGC_SH2_l1_32_shadow
-#define PGC_SH2_l2_shadow  PGC_SH2_l2_32_shadow
-#define PGC_SH2_fl1_shadow PGC_SH2_fl1_32_shadow
-
-#else /* GUEST_PAGING_LEVELS != 2 */
-
-#if GUEST_PAGING_LEVELS == 3
-#define GUEST_L1_PAGETABLE_ENTRIES      512
-#define GUEST_L2_PAGETABLE_ENTRIES      512
-#define GUEST_L3_PAGETABLE_ENTRIES        4
-#define GUEST_L1_PAGETABLE_SHIFT         12
-#define GUEST_L2_PAGETABLE_SHIFT         21
-#define GUEST_L3_PAGETABLE_SHIFT         30
-#else /* GUEST_PAGING_LEVELS == 4 */
-#define GUEST_L1_PAGETABLE_ENTRIES      512
-#define GUEST_L2_PAGETABLE_ENTRIES      512
-#define GUEST_L3_PAGETABLE_ENTRIES      512
-#define GUEST_L4_PAGETABLE_ENTRIES      512
-#define GUEST_L1_PAGETABLE_SHIFT         12
-#define GUEST_L2_PAGETABLE_SHIFT         21
-#define GUEST_L3_PAGETABLE_SHIFT         30
-#define GUEST_L4_PAGETABLE_SHIFT         39
-#endif
-
-/* Type of the guest's frame numbers */
-TYPE_SAFE(unsigned long,gfn)
-#define INVALID_GFN ((unsigned long)(-1ul))
-#define SH2_PRI_gfn "05lx"
-
-/* Types of the guest's page tables */
-typedef l1_pgentry_t guest_l1e_t;
-typedef l2_pgentry_t guest_l2e_t;
-typedef l3_pgentry_t guest_l3e_t;
-#if GUEST_PAGING_LEVELS >= 4
-typedef l4_pgentry_t guest_l4e_t;
-#endif
-
-/* Access functions for them */
-static inline paddr_t guest_l1e_get_paddr(guest_l1e_t gl1e)
-{ return l1e_get_paddr(gl1e); }
-static inline paddr_t guest_l2e_get_paddr(guest_l2e_t gl2e)
-{ return l2e_get_paddr(gl2e); }
-static inline paddr_t guest_l3e_get_paddr(guest_l3e_t gl3e)
-{ return l3e_get_paddr(gl3e); }
-#if GUEST_PAGING_LEVELS >= 4
-static inline paddr_t guest_l4e_get_paddr(guest_l4e_t gl4e)
-{ return l4e_get_paddr(gl4e); }
-#endif
-
-static inline gfn_t guest_l1e_get_gfn(guest_l1e_t gl1e)
-{ return _gfn(l1e_get_paddr(gl1e) >> PAGE_SHIFT); }
-static inline gfn_t guest_l2e_get_gfn(guest_l2e_t gl2e)
-{ return _gfn(l2e_get_paddr(gl2e) >> PAGE_SHIFT); }
-static inline gfn_t guest_l3e_get_gfn(guest_l3e_t gl3e)
-{ return _gfn(l3e_get_paddr(gl3e) >> PAGE_SHIFT); }
-#if GUEST_PAGING_LEVELS >= 4
-static inline gfn_t guest_l4e_get_gfn(guest_l4e_t gl4e)
-{ return _gfn(l4e_get_paddr(gl4e) >> PAGE_SHIFT); }
-#endif
-
-static inline u32 guest_l1e_get_flags(guest_l1e_t gl1e)
-{ return l1e_get_flags(gl1e); }
-static inline u32 guest_l2e_get_flags(guest_l2e_t gl2e)
-{ return l2e_get_flags(gl2e); }
-static inline u32 guest_l3e_get_flags(guest_l3e_t gl3e)
-{ return l3e_get_flags(gl3e); }
-#if GUEST_PAGING_LEVELS >= 4
-static inline u32 guest_l4e_get_flags(guest_l4e_t gl4e)
-{ return l4e_get_flags(gl4e); }
-#endif
-
-static inline guest_l1e_t guest_l1e_add_flags(guest_l1e_t gl1e, u32 flags)
-{ l1e_add_flags(gl1e, flags); return gl1e; }
-static inline guest_l2e_t guest_l2e_add_flags(guest_l2e_t gl2e, u32 flags)
-{ l2e_add_flags(gl2e, flags); return gl2e; }
-static inline guest_l3e_t guest_l3e_add_flags(guest_l3e_t gl3e, u32 flags)
-{ l3e_add_flags(gl3e, flags); return gl3e; }
-#if GUEST_PAGING_LEVELS >= 4
-static inline guest_l4e_t guest_l4e_add_flags(guest_l4e_t gl4e, u32 flags)
-{ l4e_add_flags(gl4e, flags); return gl4e; }
-#endif
-
-static inline guest_l1e_t guest_l1e_from_gfn(gfn_t gfn, u32 flags)
-{ return l1e_from_pfn(gfn_x(gfn), flags); }
-static inline guest_l2e_t guest_l2e_from_gfn(gfn_t gfn, u32 flags)
-{ return l2e_from_pfn(gfn_x(gfn), flags); }
-static inline guest_l3e_t guest_l3e_from_gfn(gfn_t gfn, u32 flags)
-{ return l3e_from_pfn(gfn_x(gfn), flags); }
-#if GUEST_PAGING_LEVELS >= 4
-static inline guest_l4e_t guest_l4e_from_gfn(gfn_t gfn, u32 flags)
-{ return l4e_from_pfn(gfn_x(gfn), flags); }
-#endif
-
-#define guest_l1_table_offset(a) l1_table_offset(a)
-#define guest_l2_table_offset(a) l2_table_offset(a)
-#define guest_l3_table_offset(a) l3_table_offset(a)
-#define guest_l4_table_offset(a) l4_table_offset(a)
-
-/* The shadow types needed for the various levels. */
-#if GUEST_PAGING_LEVELS == 3
-#define PGC_SH2_l1_shadow  PGC_SH2_l1_pae_shadow
-#define PGC_SH2_fl1_shadow PGC_SH2_fl1_pae_shadow
-#define PGC_SH2_l2_shadow  PGC_SH2_l2_pae_shadow
-#define PGC_SH2_l2h_shadow PGC_SH2_l2h_pae_shadow
-#define PGC_SH2_l3_shadow  PGC_SH2_l3_pae_shadow
-#else
-#define PGC_SH2_l1_shadow  PGC_SH2_l1_64_shadow
-#define PGC_SH2_fl1_shadow PGC_SH2_fl1_64_shadow
-#define PGC_SH2_l2_shadow  PGC_SH2_l2_64_shadow
-#define PGC_SH2_l3_shadow  PGC_SH2_l3_64_shadow
-#define PGC_SH2_l4_shadow  PGC_SH2_l4_64_shadow
-#endif
-
-#endif /* GUEST_PAGING_LEVELS != 2 */
-
-#define VALID_GFN(m) (m != INVALID_GFN)
-
-static inline int
-valid_gfn(gfn_t m)
-{
-    return VALID_GFN(gfn_x(m));
-}
-
-#if GUEST_PAGING_LEVELS == 2
-#define PGC_SH2_guest_root_type PGC_SH2_l2_32_shadow
-#elif GUEST_PAGING_LEVELS == 3
-#define PGC_SH2_guest_root_type PGC_SH2_l3_pae_shadow
-#else
-#define PGC_SH2_guest_root_type PGC_SH2_l4_64_shadow
-#endif
-
-/* Translation between mfns and gfns */
-static inline mfn_t
-vcpu_gfn_to_mfn(struct vcpu *v, gfn_t gfn)
-{
-    return sh2_vcpu_gfn_to_mfn(v, gfn_x(gfn));
-} 
-
-static inline gfn_t
-mfn_to_gfn(struct domain *d, mfn_t mfn)
-{
-    return _gfn(sh2_mfn_to_gfn(d, mfn));
-}
-
-static inline paddr_t
-gfn_to_paddr(gfn_t gfn)
-{
-    return ((paddr_t)gfn_x(gfn)) << PAGE_SHIFT;
-}
-
-/* Type used for recording a walk through guest pagetables.  It is
- * filled in by the pagetable walk function, and also used as a cache
- * for later walks.  
- * Any non-null pointer in this structure represents a mapping of guest
- * memory.  We must always call walk_init() before using a walk_t, and 
- * call walk_unmap() when we're done. 
- * The "Effective l1e" field is used when there isn't an l1e to point to, 
- * but we have fabricated an l1e for propagation to the shadow (e.g., 
- * for splintering guest superpages into many shadow l1 entries).  */
-typedef struct shadow2_walk_t walk_t;
-struct shadow2_walk_t 
-{
-    unsigned long va;           /* Address we were looking for */
-#if GUEST_PAGING_LEVELS >= 3
-#if GUEST_PAGING_LEVELS >= 4
-    guest_l4e_t *l4e;           /* Pointer to guest's level 4 entry */
-#endif
-    guest_l3e_t *l3e;           /* Pointer to guest's level 3 entry */
-#endif
-    guest_l2e_t *l2e;           /* Pointer to guest's level 2 entry */
-    guest_l1e_t *l1e;           /* Pointer to guest's level 1 entry */
-    guest_l1e_t eff_l1e;        /* Effective level 1 entry */
-#if GUEST_PAGING_LEVELS >= 3
-#if GUEST_PAGING_LEVELS >= 4
-    mfn_t l4mfn;                /* MFN that the level 4 entry is in */
-#endif
-    mfn_t l3mfn;                /* MFN that the level 3 entry is in */
-#endif
-    mfn_t l2mfn;                /* MFN that the level 2 entry is in */
-    mfn_t l1mfn;                /* MFN that the level 1 entry is in */
-};
-
-/* macros for dealing with the naming of the internal function names of the
- * shadow code's external entry points.
- */
-#define INTERNAL_NAME(name) \
-    SHADOW2_INTERNAL_NAME(name, SHADOW_PAGING_LEVELS, GUEST_PAGING_LEVELS)
-
-/* macros for renaming the primary entry points, so that they are more
- * easily distinguished from a debugger
- */
-#define sh2_page_fault              INTERNAL_NAME(sh2_page_fault)
-#define sh2_invlpg                  INTERNAL_NAME(sh2_invlpg)
-#define sh2_gva_to_gpa              INTERNAL_NAME(sh2_gva_to_gpa)
-#define sh2_gva_to_gfn              INTERNAL_NAME(sh2_gva_to_gfn)
-#define sh2_update_cr3              INTERNAL_NAME(sh2_update_cr3)
-#define sh2_remove_write_access     INTERNAL_NAME(sh2_remove_write_access)
-#define sh2_remove_all_mappings     INTERNAL_NAME(sh2_remove_all_mappings)
-#define sh2_remove_l1_shadow        INTERNAL_NAME(sh2_remove_l1_shadow)
-#define sh2_remove_l2_shadow        INTERNAL_NAME(sh2_remove_l2_shadow)
-#define sh2_remove_l3_shadow        INTERNAL_NAME(sh2_remove_l3_shadow)
-#define sh2_map_and_validate_gl4e   INTERNAL_NAME(sh2_map_and_validate_gl4e)
-#define sh2_map_and_validate_gl3e   INTERNAL_NAME(sh2_map_and_validate_gl3e)
-#define sh2_map_and_validate_gl2e   INTERNAL_NAME(sh2_map_and_validate_gl2e)
-#define sh2_map_and_validate_gl2he  INTERNAL_NAME(sh2_map_and_validate_gl2he)
-#define sh2_map_and_validate_gl1e   INTERNAL_NAME(sh2_map_and_validate_gl1e)
-#define sh2_destroy_l4_shadow       INTERNAL_NAME(sh2_destroy_l4_shadow)
-#define sh2_destroy_l3_shadow       INTERNAL_NAME(sh2_destroy_l3_shadow)
-#define sh2_destroy_l3_subshadow    INTERNAL_NAME(sh2_destroy_l3_subshadow)
-#define sh2_unpin_all_l3_subshadows INTERNAL_NAME(sh2_unpin_all_l3_subshadows)
-#define sh2_destroy_l2_shadow       INTERNAL_NAME(sh2_destroy_l2_shadow)
-#define sh2_destroy_l1_shadow       INTERNAL_NAME(sh2_destroy_l1_shadow)
-#define sh2_unhook_32b_mappings     INTERNAL_NAME(sh2_unhook_32b_mappings)
-#define sh2_unhook_pae_mappings     INTERNAL_NAME(sh2_unhook_pae_mappings)
-#define sh2_unhook_64b_mappings     INTERNAL_NAME(sh2_unhook_64b_mappings)
-#define sh2_paging_mode             INTERNAL_NAME(sh2_paging_mode)
-#define sh2_detach_old_tables       INTERNAL_NAME(sh2_detach_old_tables)
-#define sh2_x86_emulate_write       INTERNAL_NAME(sh2_x86_emulate_write)
-#define sh2_x86_emulate_cmpxchg     INTERNAL_NAME(sh2_x86_emulate_cmpxchg)
-#define sh2_x86_emulate_cmpxchg8b   INTERNAL_NAME(sh2_x86_emulate_cmpxchg8b)
-#define sh2_audit_l1_table          INTERNAL_NAME(sh2_audit_l1_table)
-#define sh2_audit_fl1_table         INTERNAL_NAME(sh2_audit_fl1_table)
-#define sh2_audit_l2_table          INTERNAL_NAME(sh2_audit_l2_table)
-#define sh2_audit_l3_table          INTERNAL_NAME(sh2_audit_l3_table)
-#define sh2_audit_l4_table          INTERNAL_NAME(sh2_audit_l4_table)
-#define sh2_guess_wrmap             INTERNAL_NAME(sh2_guess_wrmap)
-#define sh2_clear_shadow_entry      INTERNAL_NAME(sh2_clear_shadow_entry)
-
-/* sh2_make_monitor_table only depends on the number of shadow levels */
-#define sh2_make_monitor_table                          \
-        SHADOW2_INTERNAL_NAME(sh2_make_monitor_table,   \
-                              SHADOW_PAGING_LEVELS,     \
-                              SHADOW_PAGING_LEVELS)
-#define sh2_destroy_monitor_table                               \
-        SHADOW2_INTERNAL_NAME(sh2_destroy_monitor_table,        \
-                              SHADOW_PAGING_LEVELS,             \
-                              SHADOW_PAGING_LEVELS)
-
-
-#if GUEST_PAGING_LEVELS == 3
-/*
- * Accounting information stored in the shadow of PAE Guest L3 pages.
- * Because these "L3 pages" are only 32-bytes, it is inconvenient to keep
- * various refcounts, etc., on the page_info of their page.  We provide extra
- * bookkeeping space in the shadow itself, and this is the structure
- * definition for that bookkeeping information.
- */
-struct pae_l3_bookkeeping {
-    u32 vcpus;                  /* bitmap of which vcpus are currently storing
-                                 * copies of this 32-byte page */
-    u32 refcount;               /* refcount for this 32-byte page */
-    u8 pinned;                  /* is this 32-byte page pinned or not? */
-};
-
-// Convert a shadow entry pointer into a pae_l3_bookkeeping pointer.
-#define sl3p_to_info(_ptr) ((struct pae_l3_bookkeeping *)         \
-                            (((unsigned long)(_ptr) & ~31) + 32))
-
-static void sh2_destroy_l3_subshadow(struct vcpu *v, 
-                                     shadow_l3e_t *sl3e);
-
-/* Increment a subshadow ref
- * Called with a pointer to the subshadow, and the mfn of the
- * *first* page of the overall shadow. */
-static inline void sh2_get_ref_l3_subshadow(shadow_l3e_t *sl3e, mfn_t smfn)
-{
-    struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e);
-
-    /* First ref to the subshadow takes a ref to the full shadow */
-    if ( bk->refcount == 0 ) 
-        sh2_get_ref(smfn, 0);
-    if ( unlikely(++(bk->refcount) == 0) )
-    {
-        SHADOW2_PRINTK("shadow l3 subshadow ref overflow, smfn=%" SH2_PRI_mfn " sh=%p\n", 
-                       mfn_x(smfn), sl3e);
-        domain_crash_synchronous();
-    }
-}
-
-/* Decrement a subshadow ref.
- * Called with a pointer to the subshadow, and the mfn of the
- * *first* page of the overall shadow.  Calling this may cause the 
- * entire shadow to disappear, so the caller must immediately unmap 
- * the pointer after calling. */ 
-static inline void sh2_put_ref_l3_subshadow(struct vcpu *v, 
-                                            shadow_l3e_t *sl3e,
-                                            mfn_t smfn)
-{
-    struct pae_l3_bookkeeping *bk;
-
-    bk = sl3p_to_info(sl3e);
-
-    ASSERT(bk->refcount > 0);
-    if ( --(bk->refcount) == 0 )
-    {
-        /* Need to destroy this subshadow */
-        sh2_destroy_l3_subshadow(v, sl3e);
-        /* Last ref to the subshadow had a ref to the full shadow */
-        sh2_put_ref(v, smfn, 0);
-    }
-}
-
-/* Pin a subshadow 
- * Called with a pointer to the subshadow, and the mfn of the
- * *first* page of the overall shadow. */
-static inline void sh2_pin_l3_subshadow(shadow_l3e_t *sl3e, mfn_t smfn)
-{
-    struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e);
-
-#if 0
-    debugtrace_printk("%s smfn=%05lx offset=%ld\n",
-                      __func__, mfn_x(smfn),
-                      ((unsigned long)sl3e & ~PAGE_MASK) / 64);
-#endif
-
-    if ( !bk->pinned )
-    {
-        bk->pinned = 1;
-        sh2_get_ref_l3_subshadow(sl3e, smfn);
-    }
-}
-
-/* Unpin a sub-shadow. 
- * Called with a pointer to the subshadow, and the mfn of the
- * *first* page of the overall shadow.  Calling this may cause the 
- * entire shadow to disappear, so the caller must immediately unmap 
- * the pointer after calling. */ 
-static inline void sh2_unpin_l3_subshadow(struct vcpu *v, 
-                                          shadow_l3e_t *sl3e,
-                                          mfn_t smfn)
-{
-    struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e);
-
-#if 0
-    debugtrace_printk("%s smfn=%05lx offset=%ld\n",
-                      __func__, mfn_x(smfn),
-                      ((unsigned long)sl3e & ~PAGE_MASK) / 64);
-#endif
-
-    if ( bk->pinned )
-    {
-        bk->pinned = 0;
-        sh2_put_ref_l3_subshadow(v, sl3e, smfn);
-    }
-}
-
-#endif /* GUEST_PAGING_LEVELS == 3 */
-
-#if SHADOW_PAGING_LEVELS == 3
-#define MFN_FITS_IN_HVM_CR3(_MFN) !(mfn_x(_MFN) >> 20)
-#endif
-
-#if SHADOW_PAGING_LEVELS == 2
-#define SH2_PRI_pte "08x"
-#else /* SHADOW_PAGING_LEVELS >= 3 */
-#ifndef __x86_64__
-#define SH2_PRI_pte "016llx"
-#else
-#define SH2_PRI_pte "016lx"
-#endif
-#endif /* SHADOW_PAGING_LEVELS >= 3 */
-
-#if GUEST_PAGING_LEVELS == 2
-#define SH2_PRI_gpte "08x"
-#else /* GUEST_PAGING_LEVELS >= 3 */
-#ifndef __x86_64__
-#define SH2_PRI_gpte "016llx"
-#else
-#define SH2_PRI_gpte "016lx"
-#endif
-#endif /* GUEST_PAGING_LEVELS >= 3 */
-
-static inline u32
-accumulate_guest_flags(walk_t *gw)
-{
-    u32 accumulated_flags;
-
-    // We accumulate the permission flags with bitwise ANDing.
-    // This works for the PRESENT bit, RW bit, and USER bit.
-    // For the NX bit, however, the polarity is wrong, so we accumulate the
-    // inverse of the NX bit.
-    //
-    accumulated_flags =  guest_l1e_get_flags(gw->eff_l1e) ^ _PAGE_NX_BIT;
-    accumulated_flags &= guest_l2e_get_flags(*gw->l2e) ^ _PAGE_NX_BIT;
-
-    // Note that PAE guests do not have USER or RW or NX bits in their L3s.
-    //
-#if GUEST_PAGING_LEVELS == 3
-    accumulated_flags &=
-        ~_PAGE_PRESENT | (guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT);
-#elif GUEST_PAGING_LEVELS >= 4
-    accumulated_flags &= guest_l3e_get_flags(*gw->l3e) ^ _PAGE_NX_BIT;
-    accumulated_flags &= guest_l4e_get_flags(*gw->l4e) ^ _PAGE_NX_BIT;
-#endif
-
-    // Finally, revert the NX bit back to its original polarity
-    accumulated_flags ^= _PAGE_NX_BIT;
-
-    return accumulated_flags;
-}
-
-#endif /* _XEN_SHADOW2_TYPES_H */
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/xen/include/asm-x86/shadow2.h b/xen/include/asm-x86/shadow2.h

deleted file mode 100644 (file)

index d5b56ae..0000000
--- a/xen/include/asm-x86/shadow2.h
+++ /dev/null
@@ -1,626 +0,0 @@
-/******************************************************************************
- * include/asm-x86/shadow2.h
- * 
- * Parts of this code are Copyright (c) 2006 by XenSource Inc.
- * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
- * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#ifndef _XEN_SHADOW2_H
-#define _XEN_SHADOW2_H
-
-#include <public/domctl.h> 
-#include <xen/sched.h>
-#include <xen/perfc.h>
-#include <asm/flushtlb.h>
-
-/* Shadow PT operation mode : shadow-mode variable in arch_domain. */
-
-#define SHM2_shift 10
-/* We're in one of the shadow modes */
-#define SHM2_enable    (1U << SHM2_shift)
-/* Refcounts based on shadow tables instead of guest tables */
-#define SHM2_refcounts (XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT << SHM2_shift)
-/* Enable log dirty mode */
-#define SHM2_log_dirty (XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY << SHM2_shift)
-/* Xen does p2m translation, not guest */
-#define SHM2_translate (XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE << SHM2_shift)
-/* Xen does not steal address space from the domain for its own booking;
- * requires VT or similar mechanisms */
-#define SHM2_external  (XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL << SHM2_shift)
-
-#define shadow2_mode_enabled(_d)   ((_d)->arch.shadow2.mode)
-#define shadow2_mode_refcounts(_d) ((_d)->arch.shadow2.mode & SHM2_refcounts)
-#define shadow2_mode_log_dirty(_d) ((_d)->arch.shadow2.mode & SHM2_log_dirty)
-#define shadow2_mode_translate(_d) ((_d)->arch.shadow2.mode & SHM2_translate)
-#define shadow2_mode_external(_d)  ((_d)->arch.shadow2.mode & SHM2_external)
-
-/* Xen traps & emulates all reads of all page table pages:
- *not yet supported
- */
-#define shadow2_mode_trap_reads(_d) ({ (void)(_d); 0; })
-
-// flags used in the return value of the shadow_set_lXe() functions...
-#define SHADOW2_SET_CHANGED            0x1
-#define SHADOW2_SET_FLUSH              0x2
-#define SHADOW2_SET_ERROR              0x4
-#define SHADOW2_SET_L3PAE_RECOPY       0x8
-
-// How do we tell that we have a 32-bit PV guest in a 64-bit Xen?
-#ifdef __x86_64__
-#define pv_32bit_guest(_v) 0 // not yet supported
-#else
-#define pv_32bit_guest(_v) !hvm_guest(v)
-#endif
-
-/* The shadow2 lock.
- *
- * This lock is per-domain.  It is intended to allow us to make atomic
- * updates to the software TLB that the shadow tables provide.
- * 
- * Specifically, it protects:
- *   - all changes to shadow page table pages
- *   - the shadow hash table
- *   - the shadow page allocator 
- *   - all changes to guest page table pages; if/when the notion of
- *     out-of-sync pages is added to this code, then the shadow lock is
- *     protecting all guest page table pages which are not listed as
- *     currently as both guest-writable and out-of-sync...
- *     XXX -- need to think about this relative to writable page tables.
- *   - all changes to the page_info->tlbflush_timestamp
- *   - the page_info->count fields on shadow pages
- *   - the shadow dirty bit array and count
- *   - XXX
- */
-#ifndef CONFIG_SMP
-#error shadow2.h currently requires CONFIG_SMP
-#endif
-
-#define shadow2_lock_init(_d)                                   \
-    do {                                                        \
-        spin_lock_init(&(_d)->arch.shadow2.lock);               \
-        (_d)->arch.shadow2.locker = -1;                         \
-        (_d)->arch.shadow2.locker_function = "nobody";          \
-    } while (0)
-
-#define shadow2_lock_is_acquired(_d)                            \
-    (current->processor == (_d)->arch.shadow2.locker)
-
-#define shadow2_lock(_d)                                                 \
-    do {                                                                 \
-        if ( unlikely((_d)->arch.shadow2.locker == current->processor) ) \
-        {                                                                \
-            printk("Error: shadow2 lock held by %s\n",                   \
-                   (_d)->arch.shadow2.locker_function);                  \
-            BUG();                                                       \
-        }                                                                \
-        spin_lock(&(_d)->arch.shadow2.lock);                             \
-        ASSERT((_d)->arch.shadow2.locker == -1);                         \
-        (_d)->arch.shadow2.locker = current->processor;                  \
-        (_d)->arch.shadow2.locker_function = __func__;                   \
-    } while (0)
-
-#define shadow2_unlock(_d)                                              \
-    do {                                                                \
-        ASSERT((_d)->arch.shadow2.locker == current->processor);        \
-        (_d)->arch.shadow2.locker = -1;                                 \
-        (_d)->arch.shadow2.locker_function = "nobody";                  \
-        spin_unlock(&(_d)->arch.shadow2.lock);                          \
-    } while (0)
-
-/* 
- * Levels of self-test and paranoia
- * XXX should go in config files somewhere?  
- */
-#define SHADOW2_AUDIT_HASH           0x01  /* Check current hash bucket */
-#define SHADOW2_AUDIT_HASH_FULL      0x02  /* Check every hash bucket */
-#define SHADOW2_AUDIT_ENTRIES        0x04  /* Check this walk's shadows */
-#define SHADOW2_AUDIT_ENTRIES_FULL   0x08  /* Check every shadow */
-#define SHADOW2_AUDIT_ENTRIES_MFNS   0x10  /* Check gfn-mfn map in shadows */
-#define SHADOW2_AUDIT_P2M            0x20  /* Check the p2m table */
-
-#ifdef NDEBUG
-#define SHADOW2_AUDIT                   0
-#define SHADOW2_AUDIT_ENABLE            0
-#else
-#define SHADOW2_AUDIT                0x15  /* Basic audit of all except p2m. */
-#define SHADOW2_AUDIT_ENABLE         shadow2_audit_enable
-extern int shadow2_audit_enable;
-#endif
-
-/* 
- * Levels of optimization
- * XXX should go in config files somewhere?  
- */
-#define SH2OPT_WRITABLE_HEURISTIC  0x01  /* Guess at RW PTEs via linear maps */
-#define SH2OPT_EARLY_UNSHADOW      0x02  /* Unshadow l1s on fork or exit */
-
-#define SHADOW2_OPTIMIZATIONS      0x03
-
-
-/* With shadow pagetables, the different kinds of address start 
- * to get get confusing.
- * 
- * Virtual addresses are what they usually are: the addresses that are used 
- * to accessing memory while the guest is running.  The MMU translates from 
- * virtual addresses to machine addresses. 
- * 
- * (Pseudo-)physical addresses are the abstraction of physical memory the
- * guest uses for allocation and so forth.  For the purposes of this code, 
- * we can largely ignore them.
- *
- * Guest frame numbers (gfns) are the entries that the guest puts in its
- * pagetables.  For normal paravirtual guests, they are actual frame numbers,
- * with the translation done by the guest.  
- * 
- * Machine frame numbers (mfns) are the entries that the hypervisor puts
- * in the shadow page tables.
- *
- * Elsewhere in the xen code base, the name "gmfn" is generally used to refer
- * to a "machine frame number, from the guest's perspective", or in other
- * words, pseudo-physical frame numbers.  However, in the shadow code, the
- * term "gmfn" means "the mfn of a guest page"; this combines naturally with
- * other terms such as "smfn" (the mfn of a shadow page), gl2mfn (the mfn of a
- * guest L2 page), etc...
- */
-
-/* With this defined, we do some ugly things to force the compiler to
- * give us type safety between mfns and gfns and other integers.
- * TYPE_SAFE(int foo) defines a foo_t, and _foo() and foo_x() functions 
- * that translate beween int and foo_t.
- * 
- * It does have some performance cost because the types now have 
- * a different storage attribute, so may not want it on all the time. */
-#ifndef NDEBUG
-#define TYPE_SAFETY 1
-#endif
-
-#ifdef TYPE_SAFETY
-#define TYPE_SAFE(_type,_name)                                  \
-typedef struct { _type _name; } _name##_t;                      \
-static inline _name##_t _##_name(_type n) { return (_name##_t) { n }; } \
-static inline _type _name##_x(_name##_t n) { return n._name; }
-#else
-#define TYPE_SAFE(_type,_name)                                          \
-typedef _type _name##_t;                                                \
-static inline _name##_t _##_name(_type n) { return n; }                 \
-static inline _type _name##_x(_name##_t n) { return n; }
-#endif
-
-TYPE_SAFE(unsigned long,mfn)
-#define SH2_PRI_mfn "05lx"
-
-static inline int
-valid_mfn(mfn_t m)
-{
-    return VALID_MFN(mfn_x(m));
-}
-
-static inline mfn_t
-pagetable_get_mfn(pagetable_t pt)
-{
-    return _mfn(pagetable_get_pfn(pt));
-}
-
-static inline pagetable_t
-pagetable_from_mfn(mfn_t mfn)
-{
-    return pagetable_from_pfn(mfn_x(mfn));
-}
-
-static inline int
-shadow2_vcpu_mode_translate(struct vcpu *v)
-{
-    // Returns true if this VCPU needs to be using the P2M table to translate
-    // between GFNs and MFNs.
-    //
-    // This is true of translated HVM domains on a vcpu which has paging
-    // enabled.  (HVM vcpu's with paging disabled are using the p2m table as
-    // its paging table, so no translation occurs in this case.)
-    //
-    return v->arch.shadow2.hvm_paging_enabled;
-}
-
-
-/**************************************************************************/
-/* Mode-specific entry points into the shadow code */
-
-struct x86_emulate_ctxt;
-struct shadow2_paging_mode {
-    int           (*page_fault            )(struct vcpu *v, unsigned long va,
-                                            struct cpu_user_regs *regs);
-    int           (*invlpg                )(struct vcpu *v, unsigned long va);
-    unsigned long (*gva_to_gpa            )(struct vcpu *v, unsigned long va);
-    unsigned long (*gva_to_gfn            )(struct vcpu *v, unsigned long va);
-    void          (*update_cr3            )(struct vcpu *v);
-    int           (*map_and_validate_gl1e )(struct vcpu *v, mfn_t gmfn,
-                                            void *new_guest_entry, u32 size);
-    int           (*map_and_validate_gl2e )(struct vcpu *v, mfn_t gmfn,
-                                            void *new_guest_entry, u32 size);
-    int           (*map_and_validate_gl2he)(struct vcpu *v, mfn_t gmfn,
-                                            void *new_guest_entry, u32 size);
-    int           (*map_and_validate_gl3e )(struct vcpu *v, mfn_t gmfn,
-                                            void *new_guest_entry, u32 size);
-    int           (*map_and_validate_gl4e )(struct vcpu *v, mfn_t gmfn,
-                                            void *new_guest_entry, u32 size);
-    void          (*detach_old_tables     )(struct vcpu *v);
-    int           (*x86_emulate_write     )(struct vcpu *v, unsigned long va,
-                                            void *src, u32 bytes,
-                                            struct x86_emulate_ctxt *ctxt);
-    int           (*x86_emulate_cmpxchg   )(struct vcpu *v, unsigned long va,
-                                            unsigned long old, 
-                                            unsigned long new,
-                                            unsigned int bytes,
-                                            struct x86_emulate_ctxt *ctxt);
-    int           (*x86_emulate_cmpxchg8b )(struct vcpu *v, unsigned long va,
-                                            unsigned long old_lo, 
-                                            unsigned long old_hi, 
-                                            unsigned long new_lo,
-                                            unsigned long new_hi,
-                                            struct x86_emulate_ctxt *ctxt);
-    mfn_t         (*make_monitor_table    )(struct vcpu *v);
-    void          (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn);
-#if SHADOW2_OPTIMIZATIONS & SH2OPT_WRITABLE_HEURISTIC
-    int           (*guess_wrmap           )(struct vcpu *v, 
-                                            unsigned long vaddr, mfn_t gmfn);
-#endif
-    /* For outsiders to tell what mode we're in */
-    unsigned int shadow_levels;
-    unsigned int guest_levels;
-};
-
-static inline int shadow2_guest_paging_levels(struct vcpu *v)
-{
-    ASSERT(v->arch.shadow2.mode != NULL);
-    return v->arch.shadow2.mode->guest_levels;
-}
-
-/**************************************************************************/
-/* Entry points into the shadow code */
-
-/* Turning on shadow2 test mode */
-int shadow2_test_enable(struct domain *d);
-
-/* Handler for shadow control ops: enabling and disabling shadow modes, 
- * and log-dirty bitmap ops all happen through here. */
-int shadow2_domctl(struct domain *d, 
-                   xen_domctl_shadow_op_t *sc,
-                   XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
-
-/* Call when destroying a domain */
-void shadow2_teardown(struct domain *d);
-
-/* Call once all of the references to the domain have gone away */
-void shadow2_final_teardown(struct domain *d);
-
-
-/* Mark a page as dirty in the bitmap */
-void sh2_do_mark_dirty(struct domain *d, mfn_t gmfn);
-static inline void mark_dirty(struct domain *d, unsigned long gmfn)
-{
-    if ( shadow2_mode_log_dirty(d) )
-    {
-        shadow2_lock(d);
-        sh2_do_mark_dirty(d, _mfn(gmfn));
-        shadow2_unlock(d);
-    }
-}
-
-/* Internal version, for when the shadow lock is already held */
-static inline void sh2_mark_dirty(struct domain *d, mfn_t gmfn)
-{
-    ASSERT(shadow2_lock_is_acquired(d));
-    if ( shadow2_mode_log_dirty(d) )
-        sh2_do_mark_dirty(d, gmfn);
-}
-
-static inline int
-shadow2_fault(unsigned long va, struct cpu_user_regs *regs)
-/* Called from pagefault handler in Xen, and from the HVM trap handlers
- * for pagefaults.  Returns 1 if this fault was an artefact of the
- * shadow code (and the guest should retry) or 0 if it is not (and the
- * fault should be handled elsewhere or passed to the guest). */
-{
-    struct vcpu *v = current;
-    perfc_incrc(shadow2_fault);
-    return v->arch.shadow2.mode->page_fault(v, va, regs);
-}
-
-static inline int
-shadow2_invlpg(struct vcpu *v, unsigned long va)
-/* Called when the guest requests an invlpg.  Returns 1 if the invlpg
- * instruction should be issued on the hardware, or 0 if it's safe not
- * to do so. */
-{
-    return v->arch.shadow2.mode->invlpg(v, va);
-}
-
-static inline unsigned long
-shadow2_gva_to_gpa(struct vcpu *v, unsigned long va)
-/* Called to translate a guest virtual address to what the *guest*
- * pagetables would map it to. */
-{
-    return v->arch.shadow2.mode->gva_to_gpa(v, va);
-}
-
-static inline unsigned long
-shadow2_gva_to_gfn(struct vcpu *v, unsigned long va)
-/* Called to translate a guest virtual address to what the *guest*
- * pagetables would map it to. */
-{
-    return v->arch.shadow2.mode->gva_to_gfn(v, va);
-}
-
-static inline void
-shadow2_update_cr3(struct vcpu *v)
-/* Updates all the things that are derived from the guest's CR3. 
- * Called when the guest changes CR3. */
-{
-    shadow2_lock(v->domain);
-    v->arch.shadow2.mode->update_cr3(v);
-    shadow2_unlock(v->domain);
-}
-
-
-/* Should be called after CR3 is updated.
- * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3.
- * 
- * Also updates other state derived from CR3 (vcpu->arch.guest_vtable,
- * shadow_vtable, etc).
- *
- * Uses values found in vcpu->arch.(guest_table and guest_table_user), and
- * for HVM guests, arch.monitor_table and hvm's guest CR3.
- *
- * Update ref counts to shadow tables appropriately.
- * For PAE, relocate L3 entries, if necessary, into low memory.
- */
-static inline void update_cr3(struct vcpu *v)
-{
-    unsigned long cr3_mfn=0;
-
-    if ( shadow2_mode_enabled(v->domain) )
-    {
-        shadow2_update_cr3(v);
-        return;
-    }
-
-#if CONFIG_PAGING_LEVELS == 4
-    if ( !(v->arch.flags & TF_kernel_mode) )
-        cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user);
-    else
-#endif
-        cr3_mfn = pagetable_get_pfn(v->arch.guest_table);
-
-    make_cr3(v, cr3_mfn);
-}
-
-extern void sh2_update_paging_modes(struct vcpu *v);
-
-/* Should be called to initialise paging structures if the paging mode
- * has changed, and when bringing up a VCPU for the first time. */
-static inline void shadow2_update_paging_modes(struct vcpu *v)
-{
-    ASSERT(shadow2_mode_enabled(v->domain));
-    shadow2_lock(v->domain);
-    sh2_update_paging_modes(v);
-    shadow2_unlock(v->domain);
-}
-
-static inline void
-shadow2_detach_old_tables(struct vcpu *v)
-{
-    if ( v->arch.shadow2.mode )
-        v->arch.shadow2.mode->detach_old_tables(v);
-}
-
-static inline mfn_t
-shadow2_make_monitor_table(struct vcpu *v)
-{
-    return v->arch.shadow2.mode->make_monitor_table(v);
-}
-
-static inline void
-shadow2_destroy_monitor_table(struct vcpu *v, mfn_t mmfn)
-{
-    v->arch.shadow2.mode->destroy_monitor_table(v, mmfn);
-}
-
-/* Validate a pagetable change from the guest and update the shadows. */
-extern int shadow2_validate_guest_entry(struct vcpu *v, mfn_t gmfn,
-                                        void *new_guest_entry);
-
-/* Update the shadows in response to a pagetable write from a HVM guest */
-extern void shadow2_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, 
-                                            void *entry, u32 size);
-
-/* Remove all writeable mappings of a guest frame from the shadows.
- * Returns non-zero if we need to flush TLBs. 
- * level and fault_addr desribe how we found this to be a pagetable;
- * level==0 means we have some other reason for revoking write access. */
-extern int shadow2_remove_write_access(struct vcpu *v, mfn_t readonly_mfn,
-                                       unsigned int level,
-                                       unsigned long fault_addr);
-
-/* Remove all mappings of the guest mfn from the shadows. 
- * Returns non-zero if we need to flush TLBs. */
-extern int shadow2_remove_all_mappings(struct vcpu *v, mfn_t target_mfn);
-
-void
-shadow2_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn);
-/* This is a HVM page that we thing is no longer a pagetable.
- * Unshadow it, and recursively unshadow pages that reference it. */
-
-/* Remove all shadows of the guest mfn. */
-extern void sh2_remove_shadows(struct vcpu *v, mfn_t gmfn, int all);
-static inline void shadow2_remove_all_shadows(struct vcpu *v, mfn_t gmfn)
-{
-    sh2_remove_shadows(v, gmfn, 1);
-}
-
-/* Add a page to a domain */
-void
-shadow2_guest_physmap_add_page(struct domain *d, unsigned long gfn,
-                               unsigned long mfn);
-
-/* Remove a page from a domain */
-void
-shadow2_guest_physmap_remove_page(struct domain *d, unsigned long gfn,
-                                  unsigned long mfn);
-
-/*
- * Definitions for the shadow2_flags field in page_info.
- * These flags are stored on *guest* pages...
- * Bits 1-13 are encodings for the shadow types.
- */
-#define PGC_SH2_type_to_index(_type) ((_type) >> PGC_SH2_type_shift)
-#define SH2F_page_type_mask \
-    (((1u << (PGC_SH2_type_to_index(PGC_SH2_max_shadow) + 1u)) - 1u) - \
-     ((1u << PGC_SH2_type_to_index(PGC_SH2_min_shadow)) - 1u))
-
-#define SH2F_L1_32   (1u << PGC_SH2_type_to_index(PGC_SH2_l1_32_shadow))
-#define SH2F_FL1_32  (1u << PGC_SH2_type_to_index(PGC_SH2_fl1_32_shadow))
-#define SH2F_L2_32   (1u << PGC_SH2_type_to_index(PGC_SH2_l2_32_shadow))
-#define SH2F_L1_PAE  (1u << PGC_SH2_type_to_index(PGC_SH2_l1_pae_shadow))
-#define SH2F_FL1_PAE (1u << PGC_SH2_type_to_index(PGC_SH2_fl1_pae_shadow))
-#define SH2F_L2_PAE  (1u << PGC_SH2_type_to_index(PGC_SH2_l2_pae_shadow))
-#define SH2F_L2H_PAE (1u << PGC_SH2_type_to_index(PGC_SH2_l2h_pae_shadow))
-#define SH2F_L3_PAE  (1u << PGC_SH2_type_to_index(PGC_SH2_l3_pae_shadow))
-#define SH2F_L1_64   (1u << PGC_SH2_type_to_index(PGC_SH2_l1_64_shadow))
-#define SH2F_FL1_64  (1u << PGC_SH2_type_to_index(PGC_SH2_fl1_64_shadow))
-#define SH2F_L2_64   (1u << PGC_SH2_type_to_index(PGC_SH2_l2_64_shadow))
-#define SH2F_L3_64   (1u << PGC_SH2_type_to_index(PGC_SH2_l3_64_shadow))
-#define SH2F_L4_64   (1u << PGC_SH2_type_to_index(PGC_SH2_l4_64_shadow))
-
-/* Used for hysteresis when automatically unhooking mappings on fork/exit */
-#define SH2F_unhooked_mappings (1u<<31)
-
-/* 
- * Allocation of shadow pages 
- */
-
-/* Return the minumum acceptable number of shadow pages a domain needs */
-unsigned int shadow2_min_acceptable_pages(struct domain *d);
-
-/* Set the pool of shadow pages to the required number of MB.
- * Input will be rounded up to at least min_acceptable_shadow_pages().
- * Returns 0 for success, 1 for failure. */
-unsigned int shadow2_set_allocation(struct domain *d, 
-                                    unsigned int megabytes,
-                                    int *preempted);
-
-/* Return the size of the shadow2 pool, rounded up to the nearest MB */
-static inline unsigned int shadow2_get_allocation(struct domain *d)
-{
-    unsigned int pg = d->arch.shadow2.total_pages;
-    return ((pg >> (20 - PAGE_SHIFT))
-            + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
-}
-
-/*
- * Linked list for chaining entries in the shadow hash table. 
- */
-struct shadow2_hash_entry {
-    struct shadow2_hash_entry *next;
-    mfn_t smfn;                 /* MFN of the shadow */
-#ifdef _x86_64_ /* Shorten 'n' so we don't waste a whole word on storing 't' */
-    unsigned long n:56;         /* MFN of guest PT or GFN of guest superpage */
-#else
-    unsigned long n;            /* MFN of guest PT or GFN of guest superpage */
-#endif
-    unsigned char t;            /* shadow type bits, or 0 for empty */
-};
-
-#define SHADOW2_HASH_BUCKETS 251
-/* Other possibly useful primes are 509, 1021, 2039, 4093, 8191, 16381 */
-
-
-#if SHADOW2_OPTIMIZATIONS & SH2OPT_CACHE_WALKS
-/* Optimization: cache the results of guest walks.  This helps with MMIO
- * and emulated writes, which tend to issue very similar walk requests
- * repeatedly.  We keep the results of the last few walks, and blow
- * away the cache on guest cr3 write, mode change, or page fault. */
-
-#define SH2_WALK_CACHE_ENTRIES 4
-
-/* Rather than cache a guest walk, which would include mapped pointers 
- * to pages, we cache what a TLB would remember about the walk: the 
- * permissions and the l1 gfn */
-struct shadow2_walk_cache {
-    unsigned long va;           /* The virtual address (or 0 == unused) */
-    unsigned long gfn;          /* The gfn from the effective l1e   */
-    u32 permissions;            /* The aggregated permission bits   */
-};
-#endif
-
-
-/**************************************************************************/
-/* Guest physmap (p2m) support */
-
-/* Walk another domain's P2M table, mapping pages as we go */
-extern mfn_t
-sh2_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn);
-
-
-/* General conversion function from gfn to mfn */
-static inline mfn_t
-sh2_gfn_to_mfn(struct domain *d, unsigned long gfn)
-{
-    if ( !shadow2_mode_translate(d) )
-        return _mfn(gfn);
-    else if ( likely(current->domain == d) )
-        return _mfn(get_mfn_from_gpfn(gfn));
-    else
-        return sh2_gfn_to_mfn_foreign(d, gfn);
-}
-
-// vcpu-specific version of gfn_to_mfn().  This is where we hide the dirty
-// little secret that, for hvm guests with paging disabled, nearly all of the
-// shadow code actually think that the guest is running on *untranslated* page
-// tables (which is actually domain->phys_table).
-//
-static inline mfn_t
-sh2_vcpu_gfn_to_mfn(struct vcpu *v, unsigned long gfn)
-{ 
-    if ( !shadow2_vcpu_mode_translate(v) )
-        return _mfn(gfn);
-    if ( likely(current->domain == v->domain) )
-        return _mfn(get_mfn_from_gpfn(gfn));
-    return sh2_gfn_to_mfn_foreign(v->domain, gfn);
-}
-
-static inline unsigned long
-sh2_mfn_to_gfn(struct domain *d, mfn_t mfn)
-{
-    if ( shadow2_mode_translate(d) )
-        return get_gpfn_from_mfn(mfn_x(mfn));
-    else
-        return mfn_x(mfn);
-}
-
-
-
-#endif /* _XEN_SHADOW2_H */
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- */
-
author	kaf24@localhost.localdomain <kaf24@localhost.localdomain>
	Mon, 28 Aug 2006 11:09:36 +0000 (12:09 +0100)
committer	kaf24@localhost.localdomain <kaf24@localhost.localdomain>
	Mon, 28 Aug 2006 11:09:36 +0000 (12:09 +0100)
tools/libxc/xc_hvm_build.c		patch \| blob \| history
xen/arch/x86/Makefile		patch \| blob \| history
xen/arch/x86/domain.c		patch \| blob \| history
xen/arch/x86/domain_build.c		patch \| blob \| history
xen/arch/x86/domctl.c		patch \| blob \| history
xen/arch/x86/hvm/hvm.c		patch \| blob \| history
xen/arch/x86/hvm/platform.c		patch \| blob \| history
xen/arch/x86/hvm/svm/svm.c		patch \| blob \| history
xen/arch/x86/hvm/vmx/vmcs.c		patch \| blob \| history
xen/arch/x86/hvm/vmx/vmx.c		patch \| blob \| history
xen/arch/x86/mm.c		patch \| blob \| history
xen/arch/x86/mm/Makefile	[new file with mode: 0644]	patch \| blob
xen/arch/x86/mm/shadow/Makefile	[new file with mode: 0644]	patch \| blob
xen/arch/x86/mm/shadow/common.c	[new file with mode: 0644]	patch \| blob
xen/arch/x86/mm/shadow/multi.c	[new file with mode: 0644]	patch \| blob
xen/arch/x86/mm/shadow/multi.h	[new file with mode: 0644]	patch \| blob
xen/arch/x86/mm/shadow/page-guest32.h	[new file with mode: 0644]	patch \| blob
xen/arch/x86/mm/shadow/private.h	[new file with mode: 0644]	patch \| blob
xen/arch/x86/mm/shadow/types.h	[new file with mode: 0644]	patch \| blob
xen/arch/x86/shadow2-common.c	[deleted file]	patch \| blob \| history
xen/arch/x86/shadow2.c	[deleted file]	patch \| blob \| history
xen/arch/x86/traps.c		patch \| blob \| history
xen/include/asm-x86/domain.h		patch \| blob \| history
xen/include/asm-x86/mm.h		patch \| blob \| history
xen/include/asm-x86/page-guest32.h	[deleted file]	patch \| blob \| history
xen/include/asm-x86/perfc_defn.h		patch \| blob \| history
xen/include/asm-x86/shadow.h		patch \| blob \| history
xen/include/asm-x86/shadow2-multi.h	[deleted file]	patch \| blob \| history
xen/include/asm-x86/shadow2-private.h	[deleted file]	patch \| blob \| history
xen/include/asm-x86/shadow2-types.h	[deleted file]	patch \| blob \| history
xen/include/asm-x86/shadow2.h	[deleted file]	patch \| blob \| history